yt_util 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/yt_util/scrape.rb +69 -40
- data/lib/yt_util/try.rb +102 -0
- data/lib/yt_util/version.rb +1 -1
- data/lib/yt_util.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7b1ed94c177fb7d05f71eae24e87db5467d01c1
|
4
|
+
data.tar.gz: 487ea160d323c3b8ef432d91523d2afbc1d44a0b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d529e7cdf11e6b2f4b5e844eddceec0886213894fa87aa70c642042a325a58ac20684c536cb696a5e2ca4111e17c194294a4ead6692f2d940dd16899b5aecf0c
|
7
|
+
data.tar.gz: cfa6c69365757262e89dd62ee777c9a7dcef6eede3093393eec9ada9a3b0dd34270d2a9fdadcdc5d656f48af51b22b7db7cc3ec3fa3ac8523eb8593b19808669
|
data/lib/yt_util/scrape.rb
CHANGED
@@ -5,69 +5,98 @@ module YtUtil
|
|
5
5
|
# OR range etc https://support.google.com/websearch/answer/136861?hl=en
|
6
6
|
# punctuation: https://support.google.com/websearch/answer/2466433
|
7
7
|
|
8
|
-
def self.
|
9
|
-
|
10
|
-
|
11
|
-
get("https://www.youtube.com/results?search_query=#{Addressable::URI.parse(search).normalize + filters}")
|
12
|
-
parse_query(results.parser)
|
8
|
+
def self.raw_query(search, filters = "")
|
9
|
+
raise "Invalid object type" unless search.is_a? String and filters.is_a? String
|
10
|
+
request_query(search,filters)
|
13
11
|
end
|
14
12
|
|
15
|
-
def self.
|
16
|
-
result =
|
17
|
-
|
13
|
+
def self.query(search = nil, filters = "", &qry)
|
14
|
+
result = qry.try(:call) || raw_query(search,filters)
|
15
|
+
parse_query(result)
|
18
16
|
end
|
19
17
|
|
20
|
-
def self.
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
def self.raw_video_stats(video_code)
|
19
|
+
raise "Invalid object type" unless video_code.is_a? String
|
20
|
+
raise "Invalid video code" unless video_code.length == 11
|
21
|
+
request_video_stats(video_code)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.video_stats(video_code = nil, &qry)
|
25
|
+
result = qry.try(:call) || raw_video_stats(video_code)
|
26
|
+
parse_video_page(video_code, result)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.raw_user_stats(username)
|
30
|
+
raise "Invalid object type" unless username.is_a? String
|
31
|
+
request_user_stats(username)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.user_stats(username = nil, &qry)
|
35
|
+
result = qry.try(:call) || raw_user_stats(username)
|
36
|
+
parse_user(result)
|
25
37
|
end
|
26
38
|
|
27
39
|
private
|
40
|
+
def self.request(web_request)
|
41
|
+
try { Mechanize.new.tap { |i| i.follow_meta_refresh = true }.get(web_request).parser } ||
|
42
|
+
Nokogiri::HTML(open(web_request))
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.request_query(search, filters = "")
|
46
|
+
web_request = "https://www.youtube.com/results?search_query=#{Addressable::URI.parse(search).normalize + filters}"
|
47
|
+
request(web_request)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.request_video_stats(video_code)
|
51
|
+
web_request = "https://www.youtube.com/watch?v=#{video_code}"
|
52
|
+
request(web_request)
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.request_user_stats(username)
|
56
|
+
web_request = "https://www.youtube.com/user/#{username}/about"
|
57
|
+
request(web_request)
|
58
|
+
end
|
59
|
+
|
28
60
|
def self.parse_query(query_result)
|
29
61
|
query_result.css("ol.item-section > li")[1..-1].map do |result|
|
30
62
|
{
|
31
|
-
title: result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3").text,
|
32
|
-
video: result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3 > a").first[:href].dup.tap{|i|i.replace i[(i.index("=").to_i+1)..-1]},
|
33
|
-
views:
|
34
|
-
new: !!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["New"],
|
35
|
-
hd: !!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["HD"],
|
36
|
-
description: result.css("div:nth-child(1)").css("div:nth-child(2)").css("
|
37
|
-
length: result.css("div:nth-child(1)").css("div:nth-child(1)").css("a:nth-child(1)").css("span:nth-child(2)").text
|
63
|
+
title: try {result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3").text},
|
64
|
+
video: try {result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3 > a").first[:href].dup.tap{|i|i.replace i[(i.index("=").to_i+1)..-1]}},
|
65
|
+
views: try {result.css('li').select {|i| i.text =~ /^[\d,]{1,} views/ }.first.text.split.first.gsub(",","_").to_i},
|
66
|
+
new: try {!!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["New"]},
|
67
|
+
hd: try {!!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["HD"]},
|
68
|
+
description: try {result.css("div:nth-child(1)").css("div:nth-child(2)").css(".yt-lockup-description").text},
|
69
|
+
length: try {result.css("div:nth-child(1)").css("div:nth-child(1)").css("a:nth-child(1)").css("span:nth-child(2)").text}
|
38
70
|
}
|
39
71
|
end
|
40
72
|
end
|
41
73
|
|
42
|
-
def self.parse_video_page(query_result)
|
74
|
+
def self.parse_video_page(video_code, query_result)
|
43
75
|
{
|
44
|
-
video: video_code,
|
45
|
-
user_name: query_result.css('a.
|
46
|
-
description: query_result.css('p#eow-description').text,
|
47
|
-
category: query_result.css('
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
dislikes: String(/(\d+)/.match(query_result.css('button#watch-dislike').text.strip.gsub(',', ''))).to_i*1000,
|
54
|
-
published: query_result.css('strong.watch-time-text').text[13..-1],
|
55
|
-
license: query_result.css('li.watch-meta-item:nth-child(2)').text.gsub("\n", '').strip.tap {|i| i.replace i[i.index(' ').to_i..-1].strip}
|
76
|
+
video: video_code || try {query_result.css('meta').select {|i| i.attributes["property"].try(:value) =~ /og:url/}.first["content"].match(/\?v=(.+)/)[1]},
|
77
|
+
user_name: try {query_result.css('li').css('a').select{|i| i.text =~ / by [a-z0-9]{1,}/i}.map {|i| i.text.match(/by ([a-z0-9]{1,})/i)[1]}.first},
|
78
|
+
description: try {query_result.css('p#eow-description').text},
|
79
|
+
category: try {query_result.css('.watch-meta-item').css('a').text},
|
80
|
+
views: try {String(/(\d+)/.match(query_result.css('div.watch-view-count').text.strip.gsub(',', ''))).to_i},
|
81
|
+
likes: try {String(/(\d+)/.match(query_result.css('button#watch-like').text.strip.gsub(',', ''))).to_i},
|
82
|
+
dislikes: try {String(/(\d+)/.match(query_result.css('button#watch-dislike').text.strip.gsub(',', ''))).to_i},
|
83
|
+
published: try {query_result.css('strong.watch-time-text').text.match(/ on ([a-z0-9, ]{11,})/i)[1]},
|
84
|
+
license: try {query_result.css('li.watch-meta-item:nth-child(2)').text.gsub("\n", '').strip.tap {|i| i.replace i[i.index(' ').to_i..-1].strip}}
|
56
85
|
}
|
57
86
|
end
|
58
87
|
|
59
88
|
def self.parse_user(query_result)
|
60
|
-
views_n_subs = query_result.css('.about-stats').
|
89
|
+
views_n_subs = try {query_result.css('.about-stats').
|
61
90
|
css('li').take(2).map{|i| i = i.text.strip; {
|
62
91
|
i.match(/[a-z]+/)[0] => i.match(/[\d,]+/)[0]}
|
63
|
-
}.inject(:update)
|
92
|
+
}.inject(:update)}
|
64
93
|
|
65
94
|
{
|
66
|
-
description: query_result.css('.about-description').css('p').text,
|
67
|
-
link: query_result.css('a[title="Google+"]')[0]["href"],
|
68
|
-
views: views_n_subs["views"],
|
69
|
-
subscribers: views_n_subs["subscribers"],
|
70
|
-
joined: query_result.css('.about-stats').css('.joined-date').text.strip
|
95
|
+
description: try {query_result.css('.about-description').css('p').text},
|
96
|
+
link: try {query_result.css('a[title="Google+"]')[0]["href"]},
|
97
|
+
views: try {views_n_subs["views"]},
|
98
|
+
subscribers: try {views_n_subs["subscribers"]},
|
99
|
+
joined: try {query_result.css('.about-stats').css('.joined-date').text.strip}
|
71
100
|
}
|
72
101
|
end
|
73
102
|
|
data/lib/yt_util/try.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
unless defined? try
|
2
|
+
class Object
|
3
|
+
# Invokes the public method whose name goes as first argument just like
|
4
|
+
# +public_send+ does, except that if the receiver does not respond to it the
|
5
|
+
# call returns +nil+ rather than raising an exception.
|
6
|
+
#
|
7
|
+
# This method is defined to be able to write
|
8
|
+
#
|
9
|
+
# @person.try(:name)
|
10
|
+
#
|
11
|
+
# instead of
|
12
|
+
#
|
13
|
+
# @person.name if @person
|
14
|
+
#
|
15
|
+
# +try+ calls can be chained:
|
16
|
+
#
|
17
|
+
# @person.try(:spouse).try(:name)
|
18
|
+
#
|
19
|
+
# instead of
|
20
|
+
#
|
21
|
+
# @person.spouse.name if @person && @person.spouse
|
22
|
+
#
|
23
|
+
# +try+ will also return +nil+ if the receiver does not respond to the method:
|
24
|
+
#
|
25
|
+
# @person.try(:non_existing_method) # => nil
|
26
|
+
#
|
27
|
+
# instead of
|
28
|
+
#
|
29
|
+
# @person.non_existing_method if @person.respond_to?(:non_existing_method) # => nil
|
30
|
+
#
|
31
|
+
# +try+ returns +nil+ when called on +nil+ regardless of whether it responds
|
32
|
+
# to the method:
|
33
|
+
#
|
34
|
+
# nil.try(:to_i) # => nil, rather than 0
|
35
|
+
#
|
36
|
+
# Arguments and blocks are forwarded to the method if invoked:
|
37
|
+
#
|
38
|
+
# @posts.try(:each_slice, 2) do |a, b|
|
39
|
+
# ...
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# The number of arguments in the signature must match. If the object responds
|
43
|
+
# to the method the call is attempted and +ArgumentError+ is still raised
|
44
|
+
# in case of argument mismatch.
|
45
|
+
#
|
46
|
+
# If +try+ is called without arguments it yields the receiver to a given
|
47
|
+
# block unless it is +nil+:
|
48
|
+
#
|
49
|
+
# @person.try do |p|
|
50
|
+
# ...
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# You can also call try with a block without accepting an argument, and the block
|
54
|
+
# will be instance_eval'ed instead:
|
55
|
+
#
|
56
|
+
# @person.try { upcase.truncate(50) }
|
57
|
+
#
|
58
|
+
# Please also note that +try+ is defined on +Object+. Therefore, it won't work
|
59
|
+
# with instances of classes that do not have +Object+ among their ancestors,
|
60
|
+
# like direct subclasses of +BasicObject+. For example, using +try+ with
|
61
|
+
# +SimpleDelegator+ will delegate +try+ to the target instead of calling it on
|
62
|
+
# the delegator itself.
|
63
|
+
def try(*a, &b)
|
64
|
+
try!(*a, &b) if a.empty? || respond_to?(a.first)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Same as #try, but will raise a NoMethodError exception if the receiver is not +nil+ and
|
68
|
+
# does not implement the tried method.
|
69
|
+
|
70
|
+
def try!(*a, &b)
|
71
|
+
if a.empty? && block_given?
|
72
|
+
if b.arity.zero?
|
73
|
+
instance_eval(&b)
|
74
|
+
else
|
75
|
+
yield self
|
76
|
+
end
|
77
|
+
else
|
78
|
+
public_send(*a, &b)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class NilClass
|
84
|
+
# Calling +try+ on +nil+ always returns +nil+.
|
85
|
+
# It becomes especially helpful when navigating through associations that may return +nil+.
|
86
|
+
#
|
87
|
+
# nil.try(:name) # => nil
|
88
|
+
#
|
89
|
+
# Without +try+
|
90
|
+
# @person && @person.children.any? && @person.children.first.name
|
91
|
+
#
|
92
|
+
# With +try+
|
93
|
+
# @person.try(:children).try(:first).try(:name)
|
94
|
+
def try(*args)
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
def try!(*args)
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/lib/yt_util/version.rb
CHANGED
data/lib/yt_util.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yt_util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel P. Clark
|
@@ -96,6 +96,7 @@ files:
|
|
96
96
|
- Rakefile
|
97
97
|
- lib/yt_util.rb
|
98
98
|
- lib/yt_util/scrape.rb
|
99
|
+
- lib/yt_util/try.rb
|
99
100
|
- lib/yt_util/url.rb
|
100
101
|
- lib/yt_util/version.rb
|
101
102
|
- spec/spec_helper.rb
|