yt_util 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/yt_util/scrape.rb +69 -40
- data/lib/yt_util/try.rb +102 -0
- data/lib/yt_util/version.rb +1 -1
- data/lib/yt_util.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7b1ed94c177fb7d05f71eae24e87db5467d01c1
|
4
|
+
data.tar.gz: 487ea160d323c3b8ef432d91523d2afbc1d44a0b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d529e7cdf11e6b2f4b5e844eddceec0886213894fa87aa70c642042a325a58ac20684c536cb696a5e2ca4111e17c194294a4ead6692f2d940dd16899b5aecf0c
|
7
|
+
data.tar.gz: cfa6c69365757262e89dd62ee777c9a7dcef6eede3093393eec9ada9a3b0dd34270d2a9fdadcdc5d656f48af51b22b7db7cc3ec3fa3ac8523eb8593b19808669
|
data/lib/yt_util/scrape.rb
CHANGED
@@ -5,69 +5,98 @@ module YtUtil
|
|
5
5
|
# OR range etc https://support.google.com/websearch/answer/136861?hl=en
|
6
6
|
# punctuation: https://support.google.com/websearch/answer/2466433
|
7
7
|
|
8
|
-
def self.
|
9
|
-
|
10
|
-
|
11
|
-
get("https://www.youtube.com/results?search_query=#{Addressable::URI.parse(search).normalize + filters}")
|
12
|
-
parse_query(results.parser)
|
8
|
+
def self.raw_query(search, filters = "")
|
9
|
+
raise "Invalid object type" unless search.is_a? String and filters.is_a? String
|
10
|
+
request_query(search,filters)
|
13
11
|
end
|
14
12
|
|
15
|
-
def self.
|
16
|
-
result =
|
17
|
-
|
13
|
+
def self.query(search = nil, filters = "", &qry)
|
14
|
+
result = qry.try(:call) || raw_query(search,filters)
|
15
|
+
parse_query(result)
|
18
16
|
end
|
19
17
|
|
20
|
-
def self.
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
def self.raw_video_stats(video_code)
|
19
|
+
raise "Invalid object type" unless video_code.is_a? String
|
20
|
+
raise "Invalid video code" unless video_code.length == 11
|
21
|
+
request_video_stats(video_code)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.video_stats(video_code = nil, &qry)
|
25
|
+
result = qry.try(:call) || raw_video_stats(video_code)
|
26
|
+
parse_video_page(video_code, result)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.raw_user_stats(username)
|
30
|
+
raise "Invalid object type" unless username.is_a? String
|
31
|
+
request_user_stats(username)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.user_stats(username = nil, &qry)
|
35
|
+
result = qry.try(:call) || raw_user_stats(username)
|
36
|
+
parse_user(result)
|
25
37
|
end
|
26
38
|
|
27
39
|
private
|
40
|
+
def self.request(web_request)
|
41
|
+
try { Mechanize.new.tap { |i| i.follow_meta_refresh = true }.get(web_request).parser } ||
|
42
|
+
Nokogiri::HTML(open(web_request))
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.request_query(search, filters = "")
|
46
|
+
web_request = "https://www.youtube.com/results?search_query=#{Addressable::URI.parse(search).normalize + filters}"
|
47
|
+
request(web_request)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.request_video_stats(video_code)
|
51
|
+
web_request = "https://www.youtube.com/watch?v=#{video_code}"
|
52
|
+
request(web_request)
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.request_user_stats(username)
|
56
|
+
web_request = "https://www.youtube.com/user/#{username}/about"
|
57
|
+
request(web_request)
|
58
|
+
end
|
59
|
+
|
28
60
|
def self.parse_query(query_result)
|
29
61
|
query_result.css("ol.item-section > li")[1..-1].map do |result|
|
30
62
|
{
|
31
|
-
title: result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3").text,
|
32
|
-
video: result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3 > a").first[:href].dup.tap{|i|i.replace i[(i.index("=").to_i+1)..-1]},
|
33
|
-
views:
|
34
|
-
new: !!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["New"],
|
35
|
-
hd: !!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["HD"],
|
36
|
-
description: result.css("div:nth-child(1)").css("div:nth-child(2)").css("
|
37
|
-
length: result.css("div:nth-child(1)").css("div:nth-child(1)").css("a:nth-child(1)").css("span:nth-child(2)").text
|
63
|
+
title: try {result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3").text},
|
64
|
+
video: try {result.css("div:nth-child(1)").css("div:nth-child(2)").css("h3 > a").first[:href].dup.tap{|i|i.replace i[(i.index("=").to_i+1)..-1]}},
|
65
|
+
views: try {result.css('li').select {|i| i.text =~ /^[\d,]{1,} views/ }.first.text.split.first.gsub(",","_").to_i},
|
66
|
+
new: try {!!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["New"]},
|
67
|
+
hd: try {!!result.css("div:nth-child(1)").css("div:nth-child(2)").css("div:nth-child(4)").css("ul:nth-child(1)").text["HD"]},
|
68
|
+
description: try {result.css("div:nth-child(1)").css("div:nth-child(2)").css(".yt-lockup-description").text},
|
69
|
+
length: try {result.css("div:nth-child(1)").css("div:nth-child(1)").css("a:nth-child(1)").css("span:nth-child(2)").text}
|
38
70
|
}
|
39
71
|
end
|
40
72
|
end
|
41
73
|
|
42
|
-
def self.parse_video_page(query_result)
|
74
|
+
def self.parse_video_page(video_code, query_result)
|
43
75
|
{
|
44
|
-
video: video_code,
|
45
|
-
user_name: query_result.css('a.
|
46
|
-
description: query_result.css('p#eow-description').text,
|
47
|
-
category: query_result.css('
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
dislikes: String(/(\d+)/.match(query_result.css('button#watch-dislike').text.strip.gsub(',', ''))).to_i*1000,
|
54
|
-
published: query_result.css('strong.watch-time-text').text[13..-1],
|
55
|
-
license: query_result.css('li.watch-meta-item:nth-child(2)').text.gsub("\n", '').strip.tap {|i| i.replace i[i.index(' ').to_i..-1].strip}
|
76
|
+
video: video_code || try {query_result.css('meta').select {|i| i.attributes["property"].try(:value) =~ /og:url/}.first["content"].match(/\?v=(.+)/)[1]},
|
77
|
+
user_name: try {query_result.css('li').css('a').select{|i| i.text =~ / by [a-z0-9]{1,}/i}.map {|i| i.text.match(/by ([a-z0-9]{1,})/i)[1]}.first},
|
78
|
+
description: try {query_result.css('p#eow-description').text},
|
79
|
+
category: try {query_result.css('.watch-meta-item').css('a').text},
|
80
|
+
views: try {String(/(\d+)/.match(query_result.css('div.watch-view-count').text.strip.gsub(',', ''))).to_i},
|
81
|
+
likes: try {String(/(\d+)/.match(query_result.css('button#watch-like').text.strip.gsub(',', ''))).to_i},
|
82
|
+
dislikes: try {String(/(\d+)/.match(query_result.css('button#watch-dislike').text.strip.gsub(',', ''))).to_i},
|
83
|
+
published: try {query_result.css('strong.watch-time-text').text.match(/ on ([a-z0-9, ]{11,})/i)[1]},
|
84
|
+
license: try {query_result.css('li.watch-meta-item:nth-child(2)').text.gsub("\n", '').strip.tap {|i| i.replace i[i.index(' ').to_i..-1].strip}}
|
56
85
|
}
|
57
86
|
end
|
58
87
|
|
59
88
|
def self.parse_user(query_result)
|
60
|
-
views_n_subs = query_result.css('.about-stats').
|
89
|
+
views_n_subs = try {query_result.css('.about-stats').
|
61
90
|
css('li').take(2).map{|i| i = i.text.strip; {
|
62
91
|
i.match(/[a-z]+/)[0] => i.match(/[\d,]+/)[0]}
|
63
|
-
}.inject(:update)
|
92
|
+
}.inject(:update)}
|
64
93
|
|
65
94
|
{
|
66
|
-
description: query_result.css('.about-description').css('p').text,
|
67
|
-
link: query_result.css('a[title="Google+"]')[0]["href"],
|
68
|
-
views: views_n_subs["views"],
|
69
|
-
subscribers: views_n_subs["subscribers"],
|
70
|
-
joined: query_result.css('.about-stats').css('.joined-date').text.strip
|
95
|
+
description: try {query_result.css('.about-description').css('p').text},
|
96
|
+
link: try {query_result.css('a[title="Google+"]')[0]["href"]},
|
97
|
+
views: try {views_n_subs["views"]},
|
98
|
+
subscribers: try {views_n_subs["subscribers"]},
|
99
|
+
joined: try {query_result.css('.about-stats').css('.joined-date').text.strip}
|
71
100
|
}
|
72
101
|
end
|
73
102
|
|
data/lib/yt_util/try.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
unless defined? try
|
2
|
+
class Object
|
3
|
+
# Invokes the public method whose name goes as first argument just like
|
4
|
+
# +public_send+ does, except that if the receiver does not respond to it the
|
5
|
+
# call returns +nil+ rather than raising an exception.
|
6
|
+
#
|
7
|
+
# This method is defined to be able to write
|
8
|
+
#
|
9
|
+
# @person.try(:name)
|
10
|
+
#
|
11
|
+
# instead of
|
12
|
+
#
|
13
|
+
# @person.name if @person
|
14
|
+
#
|
15
|
+
# +try+ calls can be chained:
|
16
|
+
#
|
17
|
+
# @person.try(:spouse).try(:name)
|
18
|
+
#
|
19
|
+
# instead of
|
20
|
+
#
|
21
|
+
# @person.spouse.name if @person && @person.spouse
|
22
|
+
#
|
23
|
+
# +try+ will also return +nil+ if the receiver does not respond to the method:
|
24
|
+
#
|
25
|
+
# @person.try(:non_existing_method) # => nil
|
26
|
+
#
|
27
|
+
# instead of
|
28
|
+
#
|
29
|
+
# @person.non_existing_method if @person.respond_to?(:non_existing_method) # => nil
|
30
|
+
#
|
31
|
+
# +try+ returns +nil+ when called on +nil+ regardless of whether it responds
|
32
|
+
# to the method:
|
33
|
+
#
|
34
|
+
# nil.try(:to_i) # => nil, rather than 0
|
35
|
+
#
|
36
|
+
# Arguments and blocks are forwarded to the method if invoked:
|
37
|
+
#
|
38
|
+
# @posts.try(:each_slice, 2) do |a, b|
|
39
|
+
# ...
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# The number of arguments in the signature must match. If the object responds
|
43
|
+
# to the method the call is attempted and +ArgumentError+ is still raised
|
44
|
+
# in case of argument mismatch.
|
45
|
+
#
|
46
|
+
# If +try+ is called without arguments it yields the receiver to a given
|
47
|
+
# block unless it is +nil+:
|
48
|
+
#
|
49
|
+
# @person.try do |p|
|
50
|
+
# ...
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# You can also call try with a block without accepting an argument, and the block
|
54
|
+
# will be instance_eval'ed instead:
|
55
|
+
#
|
56
|
+
# @person.try { upcase.truncate(50) }
|
57
|
+
#
|
58
|
+
# Please also note that +try+ is defined on +Object+. Therefore, it won't work
|
59
|
+
# with instances of classes that do not have +Object+ among their ancestors,
|
60
|
+
# like direct subclasses of +BasicObject+. For example, using +try+ with
|
61
|
+
# +SimpleDelegator+ will delegate +try+ to the target instead of calling it on
|
62
|
+
# the delegator itself.
|
63
|
+
def try(*a, &b)
|
64
|
+
try!(*a, &b) if a.empty? || respond_to?(a.first)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Same as #try, but will raise a NoMethodError exception if the receiver is not +nil+ and
|
68
|
+
# does not implement the tried method.
|
69
|
+
|
70
|
+
def try!(*a, &b)
|
71
|
+
if a.empty? && block_given?
|
72
|
+
if b.arity.zero?
|
73
|
+
instance_eval(&b)
|
74
|
+
else
|
75
|
+
yield self
|
76
|
+
end
|
77
|
+
else
|
78
|
+
public_send(*a, &b)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class NilClass
|
84
|
+
# Calling +try+ on +nil+ always returns +nil+.
|
85
|
+
# It becomes especially helpful when navigating through associations that may return +nil+.
|
86
|
+
#
|
87
|
+
# nil.try(:name) # => nil
|
88
|
+
#
|
89
|
+
# Without +try+
|
90
|
+
# @person && @person.children.any? && @person.children.first.name
|
91
|
+
#
|
92
|
+
# With +try+
|
93
|
+
# @person.try(:children).try(:first).try(:name)
|
94
|
+
def try(*args)
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
def try!(*args)
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/lib/yt_util/version.rb
CHANGED
data/lib/yt_util.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yt_util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel P. Clark
|
@@ -96,6 +96,7 @@ files:
|
|
96
96
|
- Rakefile
|
97
97
|
- lib/yt_util.rb
|
98
98
|
- lib/yt_util/scrape.rb
|
99
|
+
- lib/yt_util/try.rb
|
99
100
|
- lib/yt_util/url.rb
|
100
101
|
- lib/yt_util/version.rb
|
101
102
|
- spec/spec_helper.rb
|