youtube_parser 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +15 -3
- data/lib/youtube_parser.rb +1 -0
- data/lib/youtube_parser/channel.rb +10 -0
- data/lib/youtube_parser/channels/videos_section.rb +9 -2
- data/lib/youtube_parser/client.rb +2 -0
- data/lib/youtube_parser/config/keys.yml +12 -1
- data/lib/youtube_parser/resource.rb +4 -0
- data/lib/youtube_parser/search/channels.rb +87 -0
- data/lib/youtube_parser/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d80fe7f501434dd15d6eb673f3ceb0eee764c81ff5d73a2916890400544bdf4
|
4
|
+
data.tar.gz: 423630f4671d05371789bdd06eab593f3eb30bf7d25fff7d67855fa6d2c65888
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c90751d67f0ecb0493bb8a204dac01dade93b3deb62e726c4ce6597fa956e9c95478e04ab260c84f20a3d121c1c145f5696cbc2874229a394d636fe9244c14c
|
7
|
+
data.tar.gz: 115d0da5107925986761490852856e452f3e519070a68223538de049e7f4c469aea668f580fcc4ef20da0db62dbc0347ba6dca7494fa830adad82a7d48be7646
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -29,11 +29,23 @@ options = { channel_url: 'https://www.youtube.com/channel/channel_id',
|
|
29
29
|
YoutubeParser::Channel.new(options).info
|
30
30
|
```
|
31
31
|
|
32
|
-
|
32
|
+
To search channels by query:
|
33
|
+
```ruby
|
34
|
+
require 'youtube_parser'
|
33
35
|
|
34
|
-
|
36
|
+
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
|
37
|
+
'(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
|
38
|
+
options = { search_query: 'search query',
|
39
|
+
user_agent: user_agent }
|
40
|
+
|
41
|
+
# Search with pagination
|
42
|
+
YoutubeParser::Search::Channels.new(options).for_each_channels do |channel|
|
43
|
+
channel
|
44
|
+
end
|
35
45
|
|
36
|
-
|
46
|
+
# First page channels
|
47
|
+
YoutubeParser::Search::Channels.new(options).info
|
48
|
+
```
|
37
49
|
|
38
50
|
## Contributing
|
39
51
|
|
data/lib/youtube_parser.rb
CHANGED
@@ -15,6 +15,7 @@ require 'youtube_parser/base_parser'
|
|
15
15
|
require 'youtube_parser/channel'
|
16
16
|
require 'youtube_parser/channels/about_section'
|
17
17
|
require 'youtube_parser/channels/videos_section'
|
18
|
+
require 'youtube_parser/search/channels'
|
18
19
|
|
19
20
|
module YoutubeParser
|
20
21
|
def self.root
|
@@ -12,6 +12,7 @@ module YoutubeParser
|
|
12
12
|
|
13
13
|
def collect_channel_info
|
14
14
|
data = about_section_info.merge(video_section_info)
|
15
|
+
data[:channel_url] = channel_url
|
15
16
|
data.select! { |_, v| v.present? }
|
16
17
|
end
|
17
18
|
|
@@ -23,6 +24,15 @@ module YoutubeParser
|
|
23
24
|
@video_section_info ||= section(:videos).info
|
24
25
|
end
|
25
26
|
|
27
|
+
def channel_url
|
28
|
+
uri = URI(client.class::BASE_URL)
|
29
|
+
uri.path = options.channel_url
|
30
|
+
|
31
|
+
uri.to_s
|
32
|
+
rescue URI::InvalidComponentError
|
33
|
+
options.channel_url
|
34
|
+
end
|
35
|
+
|
26
36
|
def section(title)
|
27
37
|
constant_name = "YoutubeParser::Channels::#{title.capitalize}Section"
|
28
38
|
opts = { channel_url: options.channel_url, client: client }
|
@@ -18,13 +18,16 @@ module YoutubeParser
|
|
18
18
|
return @video_ids if @video_ids&.any?
|
19
19
|
|
20
20
|
sections.each do |section|
|
21
|
-
videos = section.dig(*keys.video_section_tabs)
|
21
|
+
videos = section.dig(*keys.video_section_tabs) ||
|
22
|
+
section.dig(*keys.second_video_section)
|
22
23
|
next unless videos.is_a? Array
|
23
24
|
|
24
|
-
@video_ids = videos
|
25
|
+
@video_ids = scrape_video_ids videos
|
25
26
|
|
26
27
|
return @video_ids if @video_ids.any?
|
27
28
|
end
|
29
|
+
|
30
|
+
[]
|
28
31
|
end
|
29
32
|
|
30
33
|
def playlist_id
|
@@ -34,6 +37,10 @@ module YoutubeParser
|
|
34
37
|
@playlist_id = section&.dig(*keys.playlist_id)
|
35
38
|
end
|
36
39
|
|
40
|
+
def scrape_video_ids(videos)
|
41
|
+
videos.map { |video| video.dig(*keys.video_ids) }.compact
|
42
|
+
end
|
43
|
+
|
37
44
|
def sections
|
38
45
|
@sections ||= json.dig(*keys.section_tabs) || []
|
39
46
|
end
|
@@ -11,6 +11,9 @@ playlist_id: ['tabRenderer', 'content', 'sectionListRenderer', 'subMenu',
|
|
11
11
|
'channelSubMenuRenderer', 'playAllButton', 'buttonRenderer',
|
12
12
|
'navigationEndpoint', 'watchPlaylistEndpoint', 'playlistId']
|
13
13
|
video_ids: ['gridVideoRenderer', 'videoId']
|
14
|
+
second_video_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
|
15
|
+
0, 'itemSectionRenderer', 'contents', 0, 'shelfRenderer', 'content',
|
16
|
+
'horizontalListRenderer', 'items']
|
14
17
|
country: ['country', 'simpleText']
|
15
18
|
description_first: ['description', 'simpleText']
|
16
19
|
description_second: ['artistBio', 'simpleText']
|
@@ -21,4 +24,12 @@ views: ['viewCountText', 'runs']
|
|
21
24
|
keywords: ['metadata', 'channelMetadataRenderer', 'keywords']
|
22
25
|
tab_renderer: ['tabRenderer']
|
23
26
|
title: ['title', 'simpleText']
|
24
|
-
statistics: ['networkStatistics', 'statistics']
|
27
|
+
statistics: ['networkStatistics', 'statistics']
|
28
|
+
channels_page: ['contents', 'twoColumnSearchResultsRenderer', 'primaryContents',
|
29
|
+
'sectionListRenderer', 'contents']
|
30
|
+
continuation_contents: ['continuationContents', 'itemSectionContinuation']
|
31
|
+
channels_contents: ['itemSectionRenderer', 'contents']
|
32
|
+
channel_renderer: ['channelRenderer']
|
33
|
+
continuations: ['continuations']
|
34
|
+
renderer_continuations: ['itemSectionRenderer', 'continuations']
|
35
|
+
continuation: ['nextContinuationData', 'continuation']
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YoutubeParser
|
4
|
+
module Search
|
5
|
+
class Channels < BaseParser
|
6
|
+
DEFAULT_PARAMS = { sp: 'EgIQAg%3D%3D' }.freeze
|
7
|
+
|
8
|
+
options :search_query
|
9
|
+
|
10
|
+
def info
|
11
|
+
(channels(search)[:channels] || []).map { |c| channel_info c }
|
12
|
+
end
|
13
|
+
|
14
|
+
def for_each_channel
|
15
|
+
for_each_page do |channels|
|
16
|
+
channels.each { |channel| yield channel_info channel }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def for_each_page
|
23
|
+
channels = channels(search)
|
24
|
+
return if channels.empty? || channels[:channels].empty?
|
25
|
+
|
26
|
+
loop do
|
27
|
+
yield channels[:channels]
|
28
|
+
|
29
|
+
break unless channels[:continuation]
|
30
|
+
|
31
|
+
channels = channels(search(ctoken: channels[:continuation]))
|
32
|
+
|
33
|
+
break if channels.empty?
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def search(opts = {})
|
38
|
+
opts = opts.merge(params)
|
39
|
+
client.get('results', opts)
|
40
|
+
end
|
41
|
+
|
42
|
+
def channels(json)
|
43
|
+
page_contents = json.dig(*keys.channels_page)
|
44
|
+
continuation_contents = json.dig(*keys.continuation_contents)
|
45
|
+
return {} if page_contents.nil? && continuation_contents.nil?
|
46
|
+
|
47
|
+
contents = (page_contents || [continuation_contents&.dig('contents')])
|
48
|
+
contents.each do |content|
|
49
|
+
channels = scrape_channels content
|
50
|
+
channels_hash = {
|
51
|
+
channels: channels,
|
52
|
+
continuation: continuation(continuation_contents, content)
|
53
|
+
}
|
54
|
+
|
55
|
+
return channels_hash if channels.any?
|
56
|
+
end
|
57
|
+
|
58
|
+
{}
|
59
|
+
end
|
60
|
+
|
61
|
+
def continuation(contents, content)
|
62
|
+
continuations = content.dig(*keys.renderer_continuations)
|
63
|
+
continuations = contents&.dig(*keys.continuations) if contents
|
64
|
+
continuations&.map do |cont|
|
65
|
+
cont.dig(*keys.continuation)
|
66
|
+
end&.compact&.first
|
67
|
+
end
|
68
|
+
|
69
|
+
def scrape_channels(content)
|
70
|
+
contents = content
|
71
|
+
contents = content.dig(*keys.channels_contents) if content.is_a? Hash
|
72
|
+
contents&.map { |c| c.dig(*keys.channel_renderer) }&.compact || []
|
73
|
+
end
|
74
|
+
|
75
|
+
def channel_info(channel)
|
76
|
+
channel_url = channel.dig(*keys.channel_url)
|
77
|
+
opts = { channel_url: channel_url, client: client }
|
78
|
+
|
79
|
+
YoutubeParser::Channel.new(opts).info
|
80
|
+
end
|
81
|
+
|
82
|
+
def params
|
83
|
+
options.attributes.slice(*required_options).merge(DEFAULT_PARAMS)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: youtube_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- lib/youtube_parser/client.rb
|
108
108
|
- lib/youtube_parser/config/keys.yml
|
109
109
|
- lib/youtube_parser/resource.rb
|
110
|
+
- lib/youtube_parser/search/channels.rb
|
110
111
|
- lib/youtube_parser/version.rb
|
111
112
|
- youtube_parser.gemspec
|
112
113
|
homepage: https://github.com/oleksiivykhor/youtube_parser
|