ex_ua 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENCE +20 -0
- data/README.md +1 -2
- data/Rakefile +2 -1
- data/ex_ua.gemspec +2 -0
- data/lib/ex_ua/category.rb +16 -22
- data/lib/ex_ua/client.rb +39 -4
- data/lib/ex_ua/item.rb +23 -6
- data/lib/ex_ua/version.rb +1 -1
- data/lib/ex_ua.rb +0 -1
- data/spec/category_spec.rb +8 -6
- data/spec/client_spec.rb +14 -3
- data/spec/data/foreign_video_russia.html +234 -233
- data/spec/data/index.html +149 -220
- data/spec/data/ru_video.html +219 -218
- data/spec/data/search?s=futurama&p=0&per=20 +149 -0
- data/spec/data/video_test.html +422 -420
- data/spec/spec_helper.rb +6 -10
- data/tasks/update_spec_data.rake +20 -0
- metadata +22 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 776595958f2a09a7ecb567195f192810b4727ba4
|
4
|
+
data.tar.gz: 04987baa71a95522a1f932902ceb5c9c33ddfb1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05ec9b4ed174096081e6e57f787a2893760110f04ffec7513ef961451ca141d1310a0e90ef2d3db1375cdf706474de0aa4b1b448391161aa148384b97dceead3
|
7
|
+
data.tar.gz: 6890c56b7a8907d83e14e6a174d9acd3dde93c4421b067e06c49f544c66b714832fca901f9ec6cdec5adb0549a43a38173f850f6a7ac153188a6945768456ba7
|
data/LICENCE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Andrii Dmytrenko
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -15,8 +15,7 @@ Example usage
|
|
15
15
|
|
16
16
|
|
17
17
|
require 'ex_ua'
|
18
|
-
|
19
|
-
base_categories = client.base_categories('ru') # Gives you array of all base categories for a given language
|
18
|
+
base_categories = ExUA::Client.base_categories('ru') # Gives you array of all base categories for a given language
|
20
19
|
category = base_categories.first # Select first category (usually video)
|
21
20
|
sub_categories = category.categories # Select sub-categories of a category
|
22
21
|
example_video_category = sub_categories.first.categories.first
|
data/Rakefile
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
require "rspec/core/rake_task"
|
3
|
+
Dir.glob('tasks/*.rake').each { |r| import r }
|
3
4
|
|
4
5
|
desc 'Default: run specs.'
|
5
|
-
task :default => :spec
|
6
|
+
task :default => :spec#[:update_spec_data, :spec]
|
6
7
|
|
7
8
|
desc "Run specs"
|
8
9
|
RSpec::Core::RakeTask.new
|
data/ex_ua.gemspec
CHANGED
@@ -17,10 +17,12 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
19
|
s.require_paths = ["lib"]
|
20
|
+
s.license = 'MIT'
|
20
21
|
|
21
22
|
# specify any dependencies here; for example:
|
22
23
|
s.add_development_dependency "rspec", '>= 2.10.0'
|
23
24
|
s.add_development_dependency "rake", '>= 0.9.0'
|
24
25
|
s.add_runtime_dependency "httparty", '>= 0.10.0'
|
25
26
|
s.add_runtime_dependency "nokogiri", '>= 1.5.0'
|
27
|
+
s.add_runtime_dependency "addressable", '>= 2.3.0'
|
26
28
|
end
|
data/lib/ex_ua/category.rb
CHANGED
@@ -1,24 +1,23 @@
|
|
1
1
|
# @author Andrii Dmytrenko
|
2
|
-
require '
|
2
|
+
require 'addressable/uri'
|
3
3
|
module ExUA
|
4
4
|
# Represents a category
|
5
5
|
# @example Usage
|
6
6
|
# #You usually get categories thru ExUA::Client object
|
7
|
-
#
|
8
|
-
# categories = client.base_categories('ru')
|
7
|
+
# categories = ExUA::Client.base_categories('ru')
|
9
8
|
# sub_categories = categories.first.categories
|
10
9
|
# items = sub_categories.first.categories.first.items
|
11
10
|
class Category
|
12
11
|
class NotFound < StandardError; end
|
13
|
-
attr_reader :id,:parent, :
|
12
|
+
attr_reader :id,:parent, :uri
|
14
13
|
|
15
14
|
# @param[ExUA::Client] ex_ua client
|
16
15
|
# @param[Fixnum] id Category id
|
17
16
|
# @param[Hash] options
|
18
|
-
def initialize(
|
19
|
-
@ex_ua = ex_ua
|
17
|
+
def initialize(options={})
|
20
18
|
@id = options[:id]
|
21
|
-
@
|
19
|
+
@uri = Addressable::URI.parse(options[:url] || url_from_id(id))
|
20
|
+
@uri.site = ExUA::BASE_URL
|
22
21
|
@name = options.delete(:name)
|
23
22
|
@parent = options.delete(:parent)
|
24
23
|
end
|
@@ -27,10 +26,6 @@ module ExUA
|
|
27
26
|
"id:#{id} name:'#{name}' page: #{page}"
|
28
27
|
end
|
29
28
|
|
30
|
-
def inspect
|
31
|
-
"#<#{self.class}: #{to_s}>"
|
32
|
-
end
|
33
|
-
|
34
29
|
# Canonical url
|
35
30
|
# @return [String]
|
36
31
|
def canonical_url
|
@@ -62,8 +57,8 @@ module ExUA
|
|
62
57
|
# @return [Array<ExUA::Category>]
|
63
58
|
def categories
|
64
59
|
page_content.search('table.include_0 a b').map do |link|
|
65
|
-
if match = link.parent.attributes["href"].value.match(%r{
|
66
|
-
Category.new(
|
60
|
+
if match = link.parent.attributes["href"].value.match(%r{(?<url>[^?]+)\?r=(?<r>\d+)})
|
61
|
+
Category.new(parent: self, url: match['url'], name: link.text)
|
67
62
|
end
|
68
63
|
end.compact
|
69
64
|
end
|
@@ -82,20 +77,24 @@ module ExUA
|
|
82
77
|
# @return [ExUA::Category]
|
83
78
|
def next
|
84
79
|
raise NotFound, "No link to a next category found" unless next?
|
85
|
-
Category.new(
|
80
|
+
Category.new(id: self.id, url: next_url)
|
86
81
|
end
|
87
82
|
|
88
83
|
# Previous category
|
89
84
|
# @return [ExUA::Category]
|
90
85
|
def prev
|
91
86
|
raise NotFound, "No link to a previous category found" unless prev?
|
92
|
-
Category.new(
|
87
|
+
Category.new(id: self.id, url: prev_url)
|
93
88
|
end
|
94
89
|
|
95
90
|
# Current page number
|
96
91
|
# @return [Fixnum]
|
97
92
|
def page
|
98
|
-
|
93
|
+
uri.query_values["p"].to_i
|
94
|
+
end
|
95
|
+
|
96
|
+
def path
|
97
|
+
uri.path
|
99
98
|
end
|
100
99
|
|
101
100
|
# Download items
|
@@ -111,13 +110,8 @@ module ExUA
|
|
111
110
|
|
112
111
|
protected
|
113
112
|
|
114
|
-
def strip_url(url)
|
115
|
-
return unless url
|
116
|
-
url.sub(ExUA::BASE_URL, '')
|
117
|
-
end
|
118
|
-
|
119
113
|
def page_content
|
120
|
-
@page_content
|
114
|
+
@page_content||=ExUA::Client.instance.get(uri.request_uri)
|
121
115
|
end
|
122
116
|
|
123
117
|
def url_from_id(id)
|
data/lib/ex_ua/client.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'httparty'
|
2
2
|
require 'nokogiri'
|
3
|
+
require 'singleton'
|
4
|
+
require 'addressable/uri'
|
3
5
|
|
4
6
|
module ExUA
|
5
7
|
# Client for ExUA
|
@@ -7,11 +9,29 @@ module ExUA
|
|
7
9
|
# client = ExUA::Client.new
|
8
10
|
# categories = client.base_categories('ru')
|
9
11
|
#
|
12
|
+
class ExUAFetcher
|
13
|
+
include HTTParty
|
14
|
+
no_follow true
|
15
|
+
def self.get_redirect(uri)
|
16
|
+
get Addressable::URI.parse(uri).normalize.to_s
|
17
|
+
rescue HTTParty::RedirectionTooDeep => e
|
18
|
+
e.response["location"]
|
19
|
+
end
|
20
|
+
end
|
10
21
|
class Client
|
22
|
+
include Singleton
|
23
|
+
KNOWN_BASE_CATEGORIES = %w[video audio images texts games software]
|
24
|
+
class<<self
|
25
|
+
[:available_languages, :base_categories, :search].each do |met|
|
26
|
+
define_method(met) do |*args| #delegate to instance
|
27
|
+
instance.public_send(met, *args)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
11
31
|
# List of available languages
|
12
32
|
# @return [Array<String>]
|
13
33
|
def available_languages
|
14
|
-
@available_langauges||=get('/').search('select[name=lang] option').inject({}){|acc,el| acc[el.attributes["value"].value]=el.text;acc}
|
34
|
+
@available_langauges ||= get('/').search('select[name=lang] option').inject({}){|acc,el| acc[el.attributes["value"].value]=el.text;acc}
|
15
35
|
end
|
16
36
|
|
17
37
|
# List of base categories for a given language
|
@@ -20,11 +40,26 @@ module ExUA
|
|
20
40
|
# client.base_categories('ru')
|
21
41
|
# @return [Array<ExUA::Category>]
|
22
42
|
def base_categories(lang)
|
23
|
-
|
43
|
+
KNOWN_BASE_CATEGORIES.map{|cat| Category.new(url: "/#{lang}/#{cat}")}
|
44
|
+
end
|
45
|
+
|
46
|
+
def search(text, page=0, per=20)
|
47
|
+
uri = Addressable::URI.parse("/search?#{Addressable::URI.form_encode(s: text, p: page, per: per)}")
|
48
|
+
page = get(uri)
|
49
|
+
page.search('table.panel tr td').map do |s|
|
50
|
+
s.search('a')[1]
|
51
|
+
end.compact.map do |link|
|
52
|
+
ExUA::Category.new(url: link.attributes['href'], name: link.text)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def get(request_uri)
|
57
|
+
Nokogiri.parse(HTTParty.get(Addressable::URI.join(ExUA::BASE_URL,request_uri).to_s).body)
|
24
58
|
end
|
25
59
|
|
26
|
-
|
27
|
-
|
60
|
+
private
|
61
|
+
def base_categories_names
|
62
|
+
KNOWN_BASE_CATEGORIES
|
28
63
|
end
|
29
64
|
end
|
30
65
|
end
|
data/lib/ex_ua/item.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'addressable/uri'
|
1
2
|
# @author Andrii Dmytrenko
|
2
3
|
module ExUA
|
3
4
|
# Download item
|
@@ -16,15 +17,31 @@ module ExUA
|
|
16
17
|
}
|
17
18
|
end
|
18
19
|
end
|
19
|
-
|
20
|
+
|
21
|
+
# Queries ex.ua to get a real url to fetch data from (follows redirect)
|
22
|
+
def retrieve_real_load_url
|
23
|
+
retrieve_real_url(download_uri)
|
24
|
+
end
|
25
|
+
|
26
|
+
def retrieve_real_get_url
|
27
|
+
retrieve_real_url(get_uri)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Actual download url with ex.ua included
|
20
31
|
# You can add ?fs_id=server_id param to download form #additional_servers
|
21
|
-
# @return[
|
22
|
-
|
23
|
-
|
32
|
+
# @return[Addressable::URI]
|
33
|
+
|
34
|
+
def get_uri
|
35
|
+
@get_url ||= Addressable::URI.join(ExUA::BASE_URL,"/get/#{self.id}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def download_uri
|
39
|
+
@downoadload_url ||= Addressable::URI.join(ExUA::BASE_URL, "/load/#{self.id}")
|
24
40
|
end
|
25
41
|
|
26
|
-
|
27
|
-
|
42
|
+
private
|
43
|
+
def retrieve_real_url(uri)
|
44
|
+
ExUA::ExUAFetcher.get_redirect(uri)
|
28
45
|
end
|
29
46
|
end
|
30
47
|
end
|
data/lib/ex_ua/version.rb
CHANGED
data/lib/ex_ua.rb
CHANGED
data/spec/category_spec.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
describe ExUA::Category do
|
3
|
+
before { stub_client }
|
3
4
|
context "general 1 page category" do
|
4
|
-
subject{ExUA::Category.new(
|
5
|
+
subject{ExUA::Category.new(url: '/ru_video.html')}
|
5
6
|
describe '#categories' do
|
6
7
|
it 'returns list of child categories' do
|
7
8
|
subject.categories.should be_kind_of(Array)
|
@@ -11,23 +12,24 @@ describe ExUA::Category do
|
|
11
12
|
end
|
12
13
|
end
|
13
14
|
its(:items){should_not be_nil}
|
14
|
-
its(:name){
|
15
|
+
its(:name){should_not be_nil}
|
15
16
|
its(:description){should_not be_nil}
|
16
17
|
its(:canonical_url){should_not be_nil}
|
17
18
|
its(:next?){should be_false}
|
18
19
|
its(:prev?){should be_false}
|
20
|
+
its(:uri){should eq(Addressable::URI.parse('http://www.ex.ua/ru_video.html'))}
|
19
21
|
end
|
20
22
|
context "general few pages category" do
|
21
|
-
subject{ExUA::Category.new(
|
23
|
+
subject{ExUA::Category.new(url: '/foreign_video_russia.html')}
|
22
24
|
describe '#next' do
|
23
25
|
it 'returns a category with same url, but different page number' do
|
24
|
-
subject.next.
|
26
|
+
subject.next.uri.request_uri.should eq('/ru/video/foreign?r=23775&p=1')
|
25
27
|
end
|
26
28
|
end
|
27
29
|
describe '#prev' do
|
28
30
|
it 'returns a category with same url, but different page number' do
|
29
31
|
pending 'find a page with prev'
|
30
|
-
expect(subject.prev.
|
32
|
+
expect(subject.prev.uri).to eq('')
|
31
33
|
end
|
32
34
|
it 'raises error when no prev url found' do
|
33
35
|
expect{subject.prev}.to raise_error(ExUA::Category::NotFound)
|
@@ -35,7 +37,7 @@ describe ExUA::Category do
|
|
35
37
|
end
|
36
38
|
end
|
37
39
|
context "item category" do
|
38
|
-
subject{ExUA::Category.new(
|
40
|
+
subject{ExUA::Category.new(url: '/video_test.html')}
|
39
41
|
its(:picture){should_not be_nil}
|
40
42
|
end
|
41
43
|
end
|
data/spec/client_spec.rb
CHANGED
@@ -1,17 +1,28 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
describe ExUA::Client do
|
3
|
+
subject{ described_class }
|
4
|
+
before { stub_client }
|
3
5
|
describe '#available_languages' do
|
4
6
|
it 'returns list of available languages' do
|
5
|
-
|
7
|
+
subject.available_languages.should be_kind_of(Hash)
|
6
8
|
end
|
7
9
|
it 'includes known languages' do
|
8
10
|
known_languages = ["en", "ru", "uk"]
|
9
|
-
(
|
11
|
+
(subject.available_languages.keys & known_languages).sort.should eq(known_languages)
|
10
12
|
end
|
11
13
|
end
|
12
14
|
describe '#base_categories' do
|
13
15
|
it 'lists base categories for a language' do
|
14
|
-
|
16
|
+
subject.base_categories("uk").should be_kind_of(Array)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
describe '#search' do
|
20
|
+
it 'returns list of categories' do
|
21
|
+
subject.search('futurama').should be_kind_of(Array)
|
22
|
+
subject.search('futurama').all?{|cat| cat.kind_of? ExUA::Category}
|
23
|
+
end
|
24
|
+
it 'returns 20 results by default' do
|
25
|
+
subject.search('futurama').size.should eq(20)
|
15
26
|
end
|
16
27
|
end
|
17
28
|
end
|