youtube_parser 0.1.2 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +4 -1
- data/README.md +15 -3
- data/lib/youtube_parser.rb +6 -3
- data/lib/youtube_parser/channel.rb +10 -0
- data/lib/youtube_parser/channels/about_section.rb +27 -3
- data/lib/youtube_parser/channels/videos_section.rb +9 -2
- data/lib/youtube_parser/client.rb +2 -0
- data/lib/youtube_parser/config/keys.yml +14 -1
- data/lib/youtube_parser/resource.rb +4 -0
- data/lib/youtube_parser/search/channels.rb +87 -0
- data/lib/youtube_parser/version.rb +1 -1
- data/youtube_parser.gemspec +3 -0
- metadata +45 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e7896f15e739e7d88e6de6acf67f07bc543897deffebe0c4e723a3746d84e32
|
4
|
+
data.tar.gz: bff5544c0e250bd76c9265f74eaee6d743423c7748030415d0983175b3c263ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b6ae178f21036ff1f620982349975cd88973aa51443a59caf8efa81a0da5a4b1bbd135f00c383f0e3c672a1edd63c74d12219c346f364066bbc952f8f59a525
|
7
|
+
data.tar.gz: 2b30490336766753b5b18aa7f0bdc5f61eea7ac6dd13c97777466c2ec5f2f25c8df3bde616152243811882b8c2c656625a50c68989cb80f51c41ff1a2939428e
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -29,11 +29,23 @@ options = { channel_url: 'https://www.youtube.com/channel/channel_id',
|
|
29
29
|
YoutubeParser::Channel.new(options).info
|
30
30
|
```
|
31
31
|
|
32
|
-
|
32
|
+
To search channels by query:
|
33
|
+
```ruby
|
34
|
+
require 'youtube_parser'
|
33
35
|
|
34
|
-
|
36
|
+
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
|
37
|
+
'(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
|
38
|
+
options = { search_query: 'search query',
|
39
|
+
user_agent: user_agent }
|
40
|
+
|
41
|
+
# Search with pagination
|
42
|
+
YoutubeParser::Search::Channels.new(options).for_each_channel do |channel|
|
43
|
+
channel
|
44
|
+
end
|
35
45
|
|
36
|
-
|
46
|
+
# First page channels
|
47
|
+
YoutubeParser::Search::Channels.new(options).info
|
48
|
+
```
|
37
49
|
|
38
50
|
## Contributing
|
39
51
|
|
data/lib/youtube_parser.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'active_support/core_ext'
|
4
|
-
require 'uri'
|
5
3
|
require 'oj'
|
4
|
+
require 'uri'
|
6
5
|
require 'faraday'
|
7
6
|
require 'ostruct'
|
8
7
|
require 'yaml'
|
9
8
|
require 'pathname'
|
9
|
+
require 'active_support'
|
10
|
+
require 'active_support/core_ext'
|
10
11
|
|
11
12
|
require 'youtube_parser/version'
|
12
13
|
require 'youtube_parser/resource'
|
@@ -15,9 +16,11 @@ require 'youtube_parser/base_parser'
|
|
15
16
|
require 'youtube_parser/channel'
|
16
17
|
require 'youtube_parser/channels/about_section'
|
17
18
|
require 'youtube_parser/channels/videos_section'
|
19
|
+
require 'youtube_parser/search/channels'
|
18
20
|
|
19
21
|
module YoutubeParser
|
20
22
|
def self.root
|
21
|
-
|
23
|
+
path = Gem::Specification.find_by_name('youtube_parser').gem_dir
|
24
|
+
Pathname.new(path)
|
22
25
|
end
|
23
26
|
end
|
@@ -12,6 +12,7 @@ module YoutubeParser
|
|
12
12
|
|
13
13
|
def collect_channel_info
|
14
14
|
data = about_section_info.merge(video_section_info)
|
15
|
+
data[:channel_url] = channel_url
|
15
16
|
data.select! { |_, v| v.present? }
|
16
17
|
end
|
17
18
|
|
@@ -23,6 +24,15 @@ module YoutubeParser
|
|
23
24
|
@video_section_info ||= section(:videos).info
|
24
25
|
end
|
25
26
|
|
27
|
+
def channel_url
|
28
|
+
uri = URI(client.class::BASE_URL)
|
29
|
+
uri.path = options.channel_url
|
30
|
+
|
31
|
+
uri.to_s
|
32
|
+
rescue URI::InvalidComponentError
|
33
|
+
options.channel_url
|
34
|
+
end
|
35
|
+
|
26
36
|
def section(title)
|
27
37
|
constant_name = "YoutubeParser::Channels::#{title.capitalize}Section"
|
28
38
|
opts = { channel_url: options.channel_url, client: client }
|
@@ -41,11 +41,11 @@ module YoutubeParser
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def tags
|
44
|
-
|
44
|
+
json.dig(*keys.keywords).to_s.split('"').select(&:present?)
|
45
45
|
end
|
46
46
|
|
47
47
|
def avatar_url
|
48
|
-
results =
|
48
|
+
results = json.dig(*keys.meta_avatar) || section.dig(*keys.avatar)
|
49
49
|
return if results.nil? || results.empty?
|
50
50
|
|
51
51
|
results.detect { |t| t&.dig('url') }&.dig('url')
|
@@ -59,7 +59,8 @@ module YoutubeParser
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def followers_count
|
62
|
-
|
62
|
+
count = scrape_subscribers_count
|
63
|
+
count.present? ? count : statistics(:followers_count)
|
63
64
|
end
|
64
65
|
|
65
66
|
def videos_count
|
@@ -73,6 +74,29 @@ module YoutubeParser
|
|
73
74
|
stats[indexes[title.to_sym].to_i].to_s.gsub(/\D+/, '')
|
74
75
|
end
|
75
76
|
|
77
|
+
def scrape_subscribers_count
|
78
|
+
return unless subscribers_match
|
79
|
+
|
80
|
+
number = subscribers_match[:number].gsub(',', '.').to_f
|
81
|
+
return number.to_i unless subscribers_match[:units].present?
|
82
|
+
|
83
|
+
subscribers_with_units number
|
84
|
+
end
|
85
|
+
|
86
|
+
def subscribers_with_units(number)
|
87
|
+
(number * units[subscribers_match[:units].downcase.to_sym]).to_i
|
88
|
+
end
|
89
|
+
|
90
|
+
def subscribers_match
|
91
|
+
regex = /(?<number>[\d.,]+)(?<units>\w?)/
|
92
|
+
subscribers_text = json.dig(*keys.subscribers_count_text).to_s
|
93
|
+
subscribers_text.match(regex)
|
94
|
+
end
|
95
|
+
|
96
|
+
def units
|
97
|
+
{ m: 1_000_000, k: 1_000 }
|
98
|
+
end
|
99
|
+
|
76
100
|
def section
|
77
101
|
return @section if @section
|
78
102
|
|
@@ -18,13 +18,16 @@ module YoutubeParser
|
|
18
18
|
return @video_ids if @video_ids&.any?
|
19
19
|
|
20
20
|
sections.each do |section|
|
21
|
-
videos = section.dig(*keys.video_section_tabs)
|
21
|
+
videos = section.dig(*keys.video_section_tabs) ||
|
22
|
+
section.dig(*keys.second_video_section)
|
22
23
|
next unless videos.is_a? Array
|
23
24
|
|
24
|
-
@video_ids = videos
|
25
|
+
@video_ids = scrape_video_ids videos
|
25
26
|
|
26
27
|
return @video_ids if @video_ids.any?
|
27
28
|
end
|
29
|
+
|
30
|
+
[]
|
28
31
|
end
|
29
32
|
|
30
33
|
def playlist_id
|
@@ -34,6 +37,10 @@ module YoutubeParser
|
|
34
37
|
@playlist_id = section&.dig(*keys.playlist_id)
|
35
38
|
end
|
36
39
|
|
40
|
+
def scrape_video_ids(videos)
|
41
|
+
videos.map { |video| video.dig(*keys.video_ids) }.compact
|
42
|
+
end
|
43
|
+
|
37
44
|
def sections
|
38
45
|
@sections ||= json.dig(*keys.section_tabs) || []
|
39
46
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
channel_url: ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl']
|
2
2
|
channel_title: ['title', 'simpleText']
|
3
3
|
subscribers_count: ['subscriberCountText', 'simpleText']
|
4
|
+
subscribers_count_text: ['header', 'c4TabbedHeaderRenderer',
|
5
|
+
'subscriberCountText']
|
4
6
|
section_tabs: ['contents', 'twoColumnBrowseResultsRenderer', 'tabs']
|
5
7
|
about_section_tab: ['tabRenderer', 'content', 'sectionListRenderer',
|
6
8
|
'contents', 0, 'itemSectionRenderer', 'contents', 0,
|
@@ -11,6 +13,9 @@ playlist_id: ['tabRenderer', 'content', 'sectionListRenderer', 'subMenu',
|
|
11
13
|
'channelSubMenuRenderer', 'playAllButton', 'buttonRenderer',
|
12
14
|
'navigationEndpoint', 'watchPlaylistEndpoint', 'playlistId']
|
13
15
|
video_ids: ['gridVideoRenderer', 'videoId']
|
16
|
+
second_video_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
|
17
|
+
0, 'itemSectionRenderer', 'contents', 0, 'shelfRenderer', 'content',
|
18
|
+
'horizontalListRenderer', 'items']
|
14
19
|
country: ['country', 'simpleText']
|
15
20
|
description_first: ['description', 'simpleText']
|
16
21
|
description_second: ['artistBio', 'simpleText']
|
@@ -21,4 +26,12 @@ views: ['viewCountText', 'runs']
|
|
21
26
|
keywords: ['metadata', 'channelMetadataRenderer', 'keywords']
|
22
27
|
tab_renderer: ['tabRenderer']
|
23
28
|
title: ['title', 'simpleText']
|
24
|
-
statistics: ['networkStatistics', 'statistics']
|
29
|
+
statistics: ['networkStatistics', 'statistics']
|
30
|
+
channels_page: ['contents', 'twoColumnSearchResultsRenderer', 'primaryContents',
|
31
|
+
'sectionListRenderer', 'contents']
|
32
|
+
continuation_contents: ['continuationContents', 'itemSectionContinuation']
|
33
|
+
channels_contents: ['itemSectionRenderer', 'contents']
|
34
|
+
channel_renderer: ['channelRenderer']
|
35
|
+
continuations: ['continuations']
|
36
|
+
renderer_continuations: ['itemSectionRenderer', 'continuations']
|
37
|
+
continuation: ['nextContinuationData', 'continuation']
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YoutubeParser
|
4
|
+
module Search
|
5
|
+
class Channels < BaseParser
|
6
|
+
DEFAULT_PARAMS = { sp: 'EgIQAg%3D%3D' }.freeze
|
7
|
+
|
8
|
+
options :search_query
|
9
|
+
|
10
|
+
def info
|
11
|
+
(channels(search)[:channels] || []).map { |c| channel_info c }
|
12
|
+
end
|
13
|
+
|
14
|
+
def for_each_channel
|
15
|
+
for_each_page do |channels|
|
16
|
+
channels.each { |channel| yield channel_info channel }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def for_each_page
|
23
|
+
channels = channels(search)
|
24
|
+
return if channels.empty? || channels[:channels].empty?
|
25
|
+
|
26
|
+
loop do
|
27
|
+
yield channels[:channels]
|
28
|
+
|
29
|
+
break unless channels[:continuation]
|
30
|
+
|
31
|
+
channels = channels(search(ctoken: channels[:continuation]))
|
32
|
+
|
33
|
+
break if channels.empty?
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def search(opts = {})
|
38
|
+
opts = opts.merge(params)
|
39
|
+
client.get('results', opts)
|
40
|
+
end
|
41
|
+
|
42
|
+
def channels(json)
|
43
|
+
page_contents = json.dig(*keys.channels_page)
|
44
|
+
continuation_contents = json.dig(*keys.continuation_contents)
|
45
|
+
return {} if page_contents.nil? && continuation_contents.nil?
|
46
|
+
|
47
|
+
contents = (page_contents || [continuation_contents&.dig('contents')])
|
48
|
+
contents.each do |content|
|
49
|
+
channels = scrape_channels content
|
50
|
+
channels_hash = {
|
51
|
+
channels: channels,
|
52
|
+
continuation: continuation(continuation_contents, content)
|
53
|
+
}
|
54
|
+
|
55
|
+
return channels_hash if channels.any?
|
56
|
+
end
|
57
|
+
|
58
|
+
{}
|
59
|
+
end
|
60
|
+
|
61
|
+
def continuation(contents, content)
|
62
|
+
continuations = content.dig(*keys.renderer_continuations)
|
63
|
+
continuations = contents&.dig(*keys.continuations) if contents
|
64
|
+
continuations&.map do |cont|
|
65
|
+
cont.dig(*keys.continuation)
|
66
|
+
end&.compact&.first
|
67
|
+
end
|
68
|
+
|
69
|
+
def scrape_channels(content)
|
70
|
+
contents = content
|
71
|
+
contents = content.dig(*keys.channels_contents) if content.is_a? Hash
|
72
|
+
contents&.map { |c| c.dig(*keys.channel_renderer) }&.compact || []
|
73
|
+
end
|
74
|
+
|
75
|
+
def channel_info(channel)
|
76
|
+
channel_url = channel.dig(*keys.channel_url)
|
77
|
+
opts = { channel_url: channel_url, client: client }
|
78
|
+
|
79
|
+
YoutubeParser::Channel.new(opts).info
|
80
|
+
end
|
81
|
+
|
82
|
+
def params
|
83
|
+
options.attributes.slice(*required_options).merge(DEFAULT_PARAMS)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/youtube_parser.gemspec
CHANGED
@@ -27,4 +27,7 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
28
28
|
spec.add_development_dependency 'byebug'
|
29
29
|
spec.add_development_dependency 'webmock'
|
30
|
+
spec.add_dependency 'activesupport'
|
31
|
+
spec.add_dependency 'faraday'
|
32
|
+
spec.add_dependency 'oj'
|
30
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: youtube_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,48 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: faraday
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: oj
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
83
125
|
description:
|
84
126
|
email:
|
85
127
|
- o.vykhor@ukr.net
|
@@ -107,6 +149,7 @@ files:
|
|
107
149
|
- lib/youtube_parser/client.rb
|
108
150
|
- lib/youtube_parser/config/keys.yml
|
109
151
|
- lib/youtube_parser/resource.rb
|
152
|
+
- lib/youtube_parser/search/channels.rb
|
110
153
|
- lib/youtube_parser/version.rb
|
111
154
|
- youtube_parser.gemspec
|
112
155
|
homepage: https://github.com/oleksiivykhor/youtube_parser
|