youtube_parser 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +15 -3
- data/lib/youtube_parser.rb +1 -0
- data/lib/youtube_parser/channel.rb +10 -0
- data/lib/youtube_parser/channels/videos_section.rb +9 -2
- data/lib/youtube_parser/client.rb +2 -0
- data/lib/youtube_parser/config/keys.yml +12 -1
- data/lib/youtube_parser/resource.rb +4 -0
- data/lib/youtube_parser/search/channels.rb +87 -0
- data/lib/youtube_parser/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d80fe7f501434dd15d6eb673f3ceb0eee764c81ff5d73a2916890400544bdf4
|
4
|
+
data.tar.gz: 423630f4671d05371789bdd06eab593f3eb30bf7d25fff7d67855fa6d2c65888
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c90751d67f0ecb0493bb8a204dac01dade93b3deb62e726c4ce6597fa956e9c95478e04ab260c84f20a3d121c1c145f5696cbc2874229a394d636fe9244c14c
|
7
|
+
data.tar.gz: 115d0da5107925986761490852856e452f3e519070a68223538de049e7f4c469aea668f580fcc4ef20da0db62dbc0347ba6dca7494fa830adad82a7d48be7646
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -29,11 +29,23 @@ options = { channel_url: 'https://www.youtube.com/channel/channel_id',
|
|
29
29
|
YoutubeParser::Channel.new(options).info
|
30
30
|
```
|
31
31
|
|
32
|
-
|
32
|
+
To search channels by query:
|
33
|
+
```ruby
|
34
|
+
require 'youtube_parser'
|
33
35
|
|
34
|
-
|
36
|
+
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
|
37
|
+
'(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
|
38
|
+
options = { search_query: 'search query',
|
39
|
+
user_agent: user_agent }
|
40
|
+
|
41
|
+
# Search with pagination
|
42
|
+
YoutubeParser::Search::Channels.new(options).for_each_channels do |channel|
|
43
|
+
channel
|
44
|
+
end
|
35
45
|
|
36
|
-
|
46
|
+
# First page channels
|
47
|
+
YoutubeParser::Search::Channels.new(options).info
|
48
|
+
```
|
37
49
|
|
38
50
|
## Contributing
|
39
51
|
|
data/lib/youtube_parser.rb
CHANGED
@@ -15,6 +15,7 @@ require 'youtube_parser/base_parser'
|
|
15
15
|
require 'youtube_parser/channel'
|
16
16
|
require 'youtube_parser/channels/about_section'
|
17
17
|
require 'youtube_parser/channels/videos_section'
|
18
|
+
require 'youtube_parser/search/channels'
|
18
19
|
|
19
20
|
module YoutubeParser
|
20
21
|
def self.root
|
@@ -12,6 +12,7 @@ module YoutubeParser
|
|
12
12
|
|
13
13
|
def collect_channel_info
|
14
14
|
data = about_section_info.merge(video_section_info)
|
15
|
+
data[:channel_url] = channel_url
|
15
16
|
data.select! { |_, v| v.present? }
|
16
17
|
end
|
17
18
|
|
@@ -23,6 +24,15 @@ module YoutubeParser
|
|
23
24
|
@video_section_info ||= section(:videos).info
|
24
25
|
end
|
25
26
|
|
27
|
+
def channel_url
|
28
|
+
uri = URI(client.class::BASE_URL)
|
29
|
+
uri.path = options.channel_url
|
30
|
+
|
31
|
+
uri.to_s
|
32
|
+
rescue URI::InvalidComponentError
|
33
|
+
options.channel_url
|
34
|
+
end
|
35
|
+
|
26
36
|
def section(title)
|
27
37
|
constant_name = "YoutubeParser::Channels::#{title.capitalize}Section"
|
28
38
|
opts = { channel_url: options.channel_url, client: client }
|
@@ -18,13 +18,16 @@ module YoutubeParser
|
|
18
18
|
return @video_ids if @video_ids&.any?
|
19
19
|
|
20
20
|
sections.each do |section|
|
21
|
-
videos = section.dig(*keys.video_section_tabs)
|
21
|
+
videos = section.dig(*keys.video_section_tabs) ||
|
22
|
+
section.dig(*keys.second_video_section)
|
22
23
|
next unless videos.is_a? Array
|
23
24
|
|
24
|
-
@video_ids = videos
|
25
|
+
@video_ids = scrape_video_ids videos
|
25
26
|
|
26
27
|
return @video_ids if @video_ids.any?
|
27
28
|
end
|
29
|
+
|
30
|
+
[]
|
28
31
|
end
|
29
32
|
|
30
33
|
def playlist_id
|
@@ -34,6 +37,10 @@ module YoutubeParser
|
|
34
37
|
@playlist_id = section&.dig(*keys.playlist_id)
|
35
38
|
end
|
36
39
|
|
40
|
+
def scrape_video_ids(videos)
|
41
|
+
videos.map { |video| video.dig(*keys.video_ids) }.compact
|
42
|
+
end
|
43
|
+
|
37
44
|
def sections
|
38
45
|
@sections ||= json.dig(*keys.section_tabs) || []
|
39
46
|
end
|
@@ -11,6 +11,9 @@ playlist_id: ['tabRenderer', 'content', 'sectionListRenderer', 'subMenu',
|
|
11
11
|
'channelSubMenuRenderer', 'playAllButton', 'buttonRenderer',
|
12
12
|
'navigationEndpoint', 'watchPlaylistEndpoint', 'playlistId']
|
13
13
|
video_ids: ['gridVideoRenderer', 'videoId']
|
14
|
+
second_video_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
|
15
|
+
0, 'itemSectionRenderer', 'contents', 0, 'shelfRenderer', 'content',
|
16
|
+
'horizontalListRenderer', 'items']
|
14
17
|
country: ['country', 'simpleText']
|
15
18
|
description_first: ['description', 'simpleText']
|
16
19
|
description_second: ['artistBio', 'simpleText']
|
@@ -21,4 +24,12 @@ views: ['viewCountText', 'runs']
|
|
21
24
|
keywords: ['metadata', 'channelMetadataRenderer', 'keywords']
|
22
25
|
tab_renderer: ['tabRenderer']
|
23
26
|
title: ['title', 'simpleText']
|
24
|
-
statistics: ['networkStatistics', 'statistics']
|
27
|
+
statistics: ['networkStatistics', 'statistics']
|
28
|
+
channels_page: ['contents', 'twoColumnSearchResultsRenderer', 'primaryContents',
|
29
|
+
'sectionListRenderer', 'contents']
|
30
|
+
continuation_contents: ['continuationContents', 'itemSectionContinuation']
|
31
|
+
channels_contents: ['itemSectionRenderer', 'contents']
|
32
|
+
channel_renderer: ['channelRenderer']
|
33
|
+
continuations: ['continuations']
|
34
|
+
renderer_continuations: ['itemSectionRenderer', 'continuations']
|
35
|
+
continuation: ['nextContinuationData', 'continuation']
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YoutubeParser
|
4
|
+
module Search
|
5
|
+
class Channels < BaseParser
|
6
|
+
DEFAULT_PARAMS = { sp: 'EgIQAg%3D%3D' }.freeze
|
7
|
+
|
8
|
+
options :search_query
|
9
|
+
|
10
|
+
def info
|
11
|
+
(channels(search)[:channels] || []).map { |c| channel_info c }
|
12
|
+
end
|
13
|
+
|
14
|
+
def for_each_channel
|
15
|
+
for_each_page do |channels|
|
16
|
+
channels.each { |channel| yield channel_info channel }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def for_each_page
|
23
|
+
channels = channels(search)
|
24
|
+
return if channels.empty? || channels[:channels].empty?
|
25
|
+
|
26
|
+
loop do
|
27
|
+
yield channels[:channels]
|
28
|
+
|
29
|
+
break unless channels[:continuation]
|
30
|
+
|
31
|
+
channels = channels(search(ctoken: channels[:continuation]))
|
32
|
+
|
33
|
+
break if channels.empty?
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def search(opts = {})
|
38
|
+
opts = opts.merge(params)
|
39
|
+
client.get('results', opts)
|
40
|
+
end
|
41
|
+
|
42
|
+
def channels(json)
|
43
|
+
page_contents = json.dig(*keys.channels_page)
|
44
|
+
continuation_contents = json.dig(*keys.continuation_contents)
|
45
|
+
return {} if page_contents.nil? && continuation_contents.nil?
|
46
|
+
|
47
|
+
contents = (page_contents || [continuation_contents&.dig('contents')])
|
48
|
+
contents.each do |content|
|
49
|
+
channels = scrape_channels content
|
50
|
+
channels_hash = {
|
51
|
+
channels: channels,
|
52
|
+
continuation: continuation(continuation_contents, content)
|
53
|
+
}
|
54
|
+
|
55
|
+
return channels_hash if channels.any?
|
56
|
+
end
|
57
|
+
|
58
|
+
{}
|
59
|
+
end
|
60
|
+
|
61
|
+
def continuation(contents, content)
|
62
|
+
continuations = content.dig(*keys.renderer_continuations)
|
63
|
+
continuations = contents&.dig(*keys.continuations) if contents
|
64
|
+
continuations&.map do |cont|
|
65
|
+
cont.dig(*keys.continuation)
|
66
|
+
end&.compact&.first
|
67
|
+
end
|
68
|
+
|
69
|
+
def scrape_channels(content)
|
70
|
+
contents = content
|
71
|
+
contents = content.dig(*keys.channels_contents) if content.is_a? Hash
|
72
|
+
contents&.map { |c| c.dig(*keys.channel_renderer) }&.compact || []
|
73
|
+
end
|
74
|
+
|
75
|
+
def channel_info(channel)
|
76
|
+
channel_url = channel.dig(*keys.channel_url)
|
77
|
+
opts = { channel_url: channel_url, client: client }
|
78
|
+
|
79
|
+
YoutubeParser::Channel.new(opts).info
|
80
|
+
end
|
81
|
+
|
82
|
+
def params
|
83
|
+
options.attributes.slice(*required_options).merge(DEFAULT_PARAMS)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: youtube_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- lib/youtube_parser/client.rb
|
108
108
|
- lib/youtube_parser/config/keys.yml
|
109
109
|
- lib/youtube_parser/resource.rb
|
110
|
+
- lib/youtube_parser/search/channels.rb
|
110
111
|
- lib/youtube_parser/version.rb
|
111
112
|
- youtube_parser.gemspec
|
112
113
|
homepage: https://github.com/oleksiivykhor/youtube_parser
|