ex_ua 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENCE +20 -0
- data/README.md +1 -2
- data/Rakefile +2 -1
- data/ex_ua.gemspec +2 -0
- data/lib/ex_ua/category.rb +16 -22
- data/lib/ex_ua/client.rb +39 -4
- data/lib/ex_ua/item.rb +23 -6
- data/lib/ex_ua/version.rb +1 -1
- data/lib/ex_ua.rb +0 -1
- data/spec/category_spec.rb +8 -6
- data/spec/client_spec.rb +14 -3
- data/spec/data/foreign_video_russia.html +234 -233
- data/spec/data/index.html +149 -220
- data/spec/data/ru_video.html +219 -218
- data/spec/data/search?s=futurama&p=0&per=20 +149 -0
- data/spec/data/video_test.html +422 -420
- data/spec/spec_helper.rb +6 -10
- data/tasks/update_spec_data.rake +20 -0
- metadata +22 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 776595958f2a09a7ecb567195f192810b4727ba4
|
4
|
+
data.tar.gz: 04987baa71a95522a1f932902ceb5c9c33ddfb1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05ec9b4ed174096081e6e57f787a2893760110f04ffec7513ef961451ca141d1310a0e90ef2d3db1375cdf706474de0aa4b1b448391161aa148384b97dceead3
|
7
|
+
data.tar.gz: 6890c56b7a8907d83e14e6a174d9acd3dde93c4421b067e06c49f544c66b714832fca901f9ec6cdec5adb0549a43a38173f850f6a7ac153188a6945768456ba7
|
data/LICENCE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Andrii Dmytrenko
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -15,8 +15,7 @@ Example usage
|
|
15
15
|
|
16
16
|
|
17
17
|
require 'ex_ua'
|
18
|
-
|
19
|
-
base_categories = client.base_categories('ru') # Gives you array of all base categories for a given language
|
18
|
+
base_categories = ExUA::Client.base_categories('ru') # Gives you array of all base categories for a given language
|
20
19
|
category = base_categories.first # Select first category (usually video)
|
21
20
|
sub_categories = category.categories # Select sub-categories of a category
|
22
21
|
example_video_category = sub_categories.first.categories.first
|
data/Rakefile
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
require "rspec/core/rake_task"
|
3
|
+
Dir.glob('tasks/*.rake').each { |r| import r }
|
3
4
|
|
4
5
|
desc 'Default: run specs.'
|
5
|
-
task :default => :spec
|
6
|
+
task :default => :spec#[:update_spec_data, :spec]
|
6
7
|
|
7
8
|
desc "Run specs"
|
8
9
|
RSpec::Core::RakeTask.new
|
data/ex_ua.gemspec
CHANGED
@@ -17,10 +17,12 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
19
|
s.require_paths = ["lib"]
|
20
|
+
s.license = 'MIT'
|
20
21
|
|
21
22
|
# specify any dependencies here; for example:
|
22
23
|
s.add_development_dependency "rspec", '>= 2.10.0'
|
23
24
|
s.add_development_dependency "rake", '>= 0.9.0'
|
24
25
|
s.add_runtime_dependency "httparty", '>= 0.10.0'
|
25
26
|
s.add_runtime_dependency "nokogiri", '>= 1.5.0'
|
27
|
+
s.add_runtime_dependency "addressable", '>= 2.3.0'
|
26
28
|
end
|
data/lib/ex_ua/category.rb
CHANGED
@@ -1,24 +1,23 @@
|
|
1
1
|
# @author Andrii Dmytrenko
|
2
|
-
require '
|
2
|
+
require 'addressable/uri'
|
3
3
|
module ExUA
|
4
4
|
# Represents a category
|
5
5
|
# @example Usage
|
6
6
|
# #You usually get categories thru ExUA::Client object
|
7
|
-
#
|
8
|
-
# categories = client.base_categories('ru')
|
7
|
+
# categories = ExUA::Client.base_categories('ru')
|
9
8
|
# sub_categories = categories.first.categories
|
10
9
|
# items = sub_categories.first.categories.first.items
|
11
10
|
class Category
|
12
11
|
class NotFound < StandardError; end
|
13
|
-
attr_reader :id,:parent, :
|
12
|
+
attr_reader :id,:parent, :uri
|
14
13
|
|
15
14
|
# @param[ExUA::Client] ex_ua client
|
16
15
|
# @param[Fixnum] id Category id
|
17
16
|
# @param[Hash] options
|
18
|
-
def initialize(
|
19
|
-
@ex_ua = ex_ua
|
17
|
+
def initialize(options={})
|
20
18
|
@id = options[:id]
|
21
|
-
@
|
19
|
+
@uri = Addressable::URI.parse(options[:url] || url_from_id(id))
|
20
|
+
@uri.site = ExUA::BASE_URL
|
22
21
|
@name = options.delete(:name)
|
23
22
|
@parent = options.delete(:parent)
|
24
23
|
end
|
@@ -27,10 +26,6 @@ module ExUA
|
|
27
26
|
"id:#{id} name:'#{name}' page: #{page}"
|
28
27
|
end
|
29
28
|
|
30
|
-
def inspect
|
31
|
-
"#<#{self.class}: #{to_s}>"
|
32
|
-
end
|
33
|
-
|
34
29
|
# Canonical url
|
35
30
|
# @return [String]
|
36
31
|
def canonical_url
|
@@ -62,8 +57,8 @@ module ExUA
|
|
62
57
|
# @return [Array<ExUA::Category>]
|
63
58
|
def categories
|
64
59
|
page_content.search('table.include_0 a b').map do |link|
|
65
|
-
if match = link.parent.attributes["href"].value.match(%r{
|
66
|
-
Category.new(
|
60
|
+
if match = link.parent.attributes["href"].value.match(%r{(?<url>[^?]+)\?r=(?<r>\d+)})
|
61
|
+
Category.new(parent: self, url: match['url'], name: link.text)
|
67
62
|
end
|
68
63
|
end.compact
|
69
64
|
end
|
@@ -82,20 +77,24 @@ module ExUA
|
|
82
77
|
# @return [ExUA::Category]
|
83
78
|
def next
|
84
79
|
raise NotFound, "No link to a next category found" unless next?
|
85
|
-
Category.new(
|
80
|
+
Category.new(id: self.id, url: next_url)
|
86
81
|
end
|
87
82
|
|
88
83
|
# Previous category
|
89
84
|
# @return [ExUA::Category]
|
90
85
|
def prev
|
91
86
|
raise NotFound, "No link to a previous category found" unless prev?
|
92
|
-
Category.new(
|
87
|
+
Category.new(id: self.id, url: prev_url)
|
93
88
|
end
|
94
89
|
|
95
90
|
# Current page number
|
96
91
|
# @return [Fixnum]
|
97
92
|
def page
|
98
|
-
|
93
|
+
uri.query_values["p"].to_i
|
94
|
+
end
|
95
|
+
|
96
|
+
def path
|
97
|
+
uri.path
|
99
98
|
end
|
100
99
|
|
101
100
|
# Download items
|
@@ -111,13 +110,8 @@ module ExUA
|
|
111
110
|
|
112
111
|
protected
|
113
112
|
|
114
|
-
def strip_url(url)
|
115
|
-
return unless url
|
116
|
-
url.sub(ExUA::BASE_URL, '')
|
117
|
-
end
|
118
|
-
|
119
113
|
def page_content
|
120
|
-
@page_content
|
114
|
+
@page_content||=ExUA::Client.instance.get(uri.request_uri)
|
121
115
|
end
|
122
116
|
|
123
117
|
def url_from_id(id)
|
data/lib/ex_ua/client.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'httparty'
|
2
2
|
require 'nokogiri'
|
3
|
+
require 'singleton'
|
4
|
+
require 'addressable/uri'
|
3
5
|
|
4
6
|
module ExUA
|
5
7
|
# Client for ExUA
|
@@ -7,11 +9,29 @@ module ExUA
|
|
7
9
|
# client = ExUA::Client.new
|
8
10
|
# categories = client.base_categories('ru')
|
9
11
|
#
|
12
|
+
class ExUAFetcher
|
13
|
+
include HTTParty
|
14
|
+
no_follow true
|
15
|
+
def self.get_redirect(uri)
|
16
|
+
get Addressable::URI.parse(uri).normalize.to_s
|
17
|
+
rescue HTTParty::RedirectionTooDeep => e
|
18
|
+
e.response["location"]
|
19
|
+
end
|
20
|
+
end
|
10
21
|
class Client
|
22
|
+
include Singleton
|
23
|
+
KNOWN_BASE_CATEGORIES = %w[video audio images texts games software]
|
24
|
+
class<<self
|
25
|
+
[:available_languages, :base_categories, :search].each do |met|
|
26
|
+
define_method(met) do |*args| #delegate to instance
|
27
|
+
instance.public_send(met, *args)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
11
31
|
# List of available languages
|
12
32
|
# @return [Array<String>]
|
13
33
|
def available_languages
|
14
|
-
@available_langauges||=get('/').search('select[name=lang] option').inject({}){|acc,el| acc[el.attributes["value"].value]=el.text;acc}
|
34
|
+
@available_langauges ||= get('/').search('select[name=lang] option').inject({}){|acc,el| acc[el.attributes["value"].value]=el.text;acc}
|
15
35
|
end
|
16
36
|
|
17
37
|
# List of base categories for a given language
|
@@ -20,11 +40,26 @@ module ExUA
|
|
20
40
|
# client.base_categories('ru')
|
21
41
|
# @return [Array<ExUA::Category>]
|
22
42
|
def base_categories(lang)
|
23
|
-
|
43
|
+
KNOWN_BASE_CATEGORIES.map{|cat| Category.new(url: "/#{lang}/#{cat}")}
|
44
|
+
end
|
45
|
+
|
46
|
+
def search(text, page=0, per=20)
|
47
|
+
uri = Addressable::URI.parse("/search?#{Addressable::URI.form_encode(s: text, p: page, per: per)}")
|
48
|
+
page = get(uri)
|
49
|
+
page.search('table.panel tr td').map do |s|
|
50
|
+
s.search('a')[1]
|
51
|
+
end.compact.map do |link|
|
52
|
+
ExUA::Category.new(url: link.attributes['href'], name: link.text)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def get(request_uri)
|
57
|
+
Nokogiri.parse(HTTParty.get(Addressable::URI.join(ExUA::BASE_URL,request_uri).to_s).body)
|
24
58
|
end
|
25
59
|
|
26
|
-
|
27
|
-
|
60
|
+
private
|
61
|
+
def base_categories_names
|
62
|
+
KNOWN_BASE_CATEGORIES
|
28
63
|
end
|
29
64
|
end
|
30
65
|
end
|
data/lib/ex_ua/item.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'addressable/uri'
|
1
2
|
# @author Andrii Dmytrenko
|
2
3
|
module ExUA
|
3
4
|
# Download item
|
@@ -16,15 +17,31 @@ module ExUA
|
|
16
17
|
}
|
17
18
|
end
|
18
19
|
end
|
19
|
-
|
20
|
+
|
21
|
+
# Queries ex.ua to get a real url to fetch data from (follows redirect)
|
22
|
+
def retrieve_real_load_url
|
23
|
+
retrieve_real_url(download_uri)
|
24
|
+
end
|
25
|
+
|
26
|
+
def retrieve_real_get_url
|
27
|
+
retrieve_real_url(get_uri)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Actual download url with ex.ua included
|
20
31
|
# You can add ?fs_id=server_id param to download form #additional_servers
|
21
|
-
# @return[
|
22
|
-
|
23
|
-
|
32
|
+
# @return[Addressable::URI]
|
33
|
+
|
34
|
+
def get_uri
|
35
|
+
@get_url ||= Addressable::URI.join(ExUA::BASE_URL,"/get/#{self.id}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def download_uri
|
39
|
+
@downoadload_url ||= Addressable::URI.join(ExUA::BASE_URL, "/load/#{self.id}")
|
24
40
|
end
|
25
41
|
|
26
|
-
|
27
|
-
|
42
|
+
private
|
43
|
+
def retrieve_real_url(uri)
|
44
|
+
ExUA::ExUAFetcher.get_redirect(uri)
|
28
45
|
end
|
29
46
|
end
|
30
47
|
end
|
data/lib/ex_ua/version.rb
CHANGED
data/lib/ex_ua.rb
CHANGED
data/spec/category_spec.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
describe ExUA::Category do
|
3
|
+
before { stub_client }
|
3
4
|
context "general 1 page category" do
|
4
|
-
subject{ExUA::Category.new(
|
5
|
+
subject{ExUA::Category.new(url: '/ru_video.html')}
|
5
6
|
describe '#categories' do
|
6
7
|
it 'returns list of child categories' do
|
7
8
|
subject.categories.should be_kind_of(Array)
|
@@ -11,23 +12,24 @@ describe ExUA::Category do
|
|
11
12
|
end
|
12
13
|
end
|
13
14
|
its(:items){should_not be_nil}
|
14
|
-
its(:name){
|
15
|
+
its(:name){should_not be_nil}
|
15
16
|
its(:description){should_not be_nil}
|
16
17
|
its(:canonical_url){should_not be_nil}
|
17
18
|
its(:next?){should be_false}
|
18
19
|
its(:prev?){should be_false}
|
20
|
+
its(:uri){should eq(Addressable::URI.parse('http://www.ex.ua/ru_video.html'))}
|
19
21
|
end
|
20
22
|
context "general few pages category" do
|
21
|
-
subject{ExUA::Category.new(
|
23
|
+
subject{ExUA::Category.new(url: '/foreign_video_russia.html')}
|
22
24
|
describe '#next' do
|
23
25
|
it 'returns a category with same url, but different page number' do
|
24
|
-
subject.next.
|
26
|
+
subject.next.uri.request_uri.should eq('/ru/video/foreign?r=23775&p=1')
|
25
27
|
end
|
26
28
|
end
|
27
29
|
describe '#prev' do
|
28
30
|
it 'returns a category with same url, but different page number' do
|
29
31
|
pending 'find a page with prev'
|
30
|
-
expect(subject.prev.
|
32
|
+
expect(subject.prev.uri).to eq('')
|
31
33
|
end
|
32
34
|
it 'raises error when no prev url found' do
|
33
35
|
expect{subject.prev}.to raise_error(ExUA::Category::NotFound)
|
@@ -35,7 +37,7 @@ describe ExUA::Category do
|
|
35
37
|
end
|
36
38
|
end
|
37
39
|
context "item category" do
|
38
|
-
subject{ExUA::Category.new(
|
40
|
+
subject{ExUA::Category.new(url: '/video_test.html')}
|
39
41
|
its(:picture){should_not be_nil}
|
40
42
|
end
|
41
43
|
end
|
data/spec/client_spec.rb
CHANGED
@@ -1,17 +1,28 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
describe ExUA::Client do
|
3
|
+
subject{ described_class }
|
4
|
+
before { stub_client }
|
3
5
|
describe '#available_languages' do
|
4
6
|
it 'returns list of available languages' do
|
5
|
-
|
7
|
+
subject.available_languages.should be_kind_of(Hash)
|
6
8
|
end
|
7
9
|
it 'includes known languages' do
|
8
10
|
known_languages = ["en", "ru", "uk"]
|
9
|
-
(
|
11
|
+
(subject.available_languages.keys & known_languages).sort.should eq(known_languages)
|
10
12
|
end
|
11
13
|
end
|
12
14
|
describe '#base_categories' do
|
13
15
|
it 'lists base categories for a language' do
|
14
|
-
|
16
|
+
subject.base_categories("uk").should be_kind_of(Array)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
describe '#search' do
|
20
|
+
it 'returns list of categories' do
|
21
|
+
subject.search('futurama').should be_kind_of(Array)
|
22
|
+
subject.search('futurama').all?{|cat| cat.kind_of? ExUA::Category}
|
23
|
+
end
|
24
|
+
it 'returns 20 results by default' do
|
25
|
+
subject.search('futurama').size.should eq(20)
|
15
26
|
end
|
16
27
|
end
|
17
28
|
end
|