youtube_parser 0.1.0 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +11 -8
- data/README.md +21 -9
- data/lib/youtube_parser.rb +4 -2
- data/lib/youtube_parser/channel.rb +10 -0
- data/lib/youtube_parser/channels/about_section.rb +6 -0
- data/lib/youtube_parser/channels/videos_section.rb +9 -2
- data/lib/youtube_parser/client.rb +3 -0
- data/lib/youtube_parser/config/keys.yml +12 -1
- data/lib/youtube_parser/resource.rb +4 -0
- data/lib/youtube_parser/search/channels.rb +87 -0
- data/lib/youtube_parser/version.rb +1 -1
- data/youtube_parser.gemspec +3 -0
- metadata +45 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59f991625cce87ba5921dd80698a24afb1cdbc0bf07634e2dd7a5925ae36863a
|
4
|
+
data.tar.gz: b9396016cd0de944ba992818accbfe6ad14295ee6aa7800f5117b18789f55db7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 513b50c2132ce17ae7a11fc36eecc26b60d3b6ec268a1b85422cd7e13960068a1996a61e11a656c0a2e13dc8524ba1582065d434a9ffb804bc2a0af7701e9972
|
7
|
+
data.tar.gz: 5ab15103ef37ba712655de36396835a91247f4f0a4370da52540ca4d2b8b5e6cfb076346f210b30e6968efd885e197c521ccadd7247d01b283d405dc441fca85
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
youtube_parser (0.1.
|
4
|
+
youtube_parser (0.1.6)
|
5
|
+
activesupport
|
6
|
+
faraday
|
7
|
+
oj
|
5
8
|
|
6
9
|
GEM
|
7
10
|
remote: https://rubygems.org/
|
@@ -12,8 +15,8 @@ GEM
|
|
12
15
|
minitest (~> 5.1)
|
13
16
|
tzinfo (~> 1.1)
|
14
17
|
zeitwerk (~> 2.2)
|
15
|
-
addressable (2.
|
16
|
-
public_suffix (>= 2.0.2, <
|
18
|
+
addressable (2.7.0)
|
19
|
+
public_suffix (>= 2.0.2, < 5.0)
|
17
20
|
byebug (11.0.1)
|
18
21
|
concurrent-ruby (1.1.5)
|
19
22
|
crack (0.4.3)
|
@@ -21,13 +24,13 @@ GEM
|
|
21
24
|
diff-lcs (1.3)
|
22
25
|
faraday (0.17.1)
|
23
26
|
multipart-post (>= 1.2, < 3)
|
24
|
-
hashdiff (0.
|
27
|
+
hashdiff (1.0.0)
|
25
28
|
i18n (1.7.0)
|
26
29
|
concurrent-ruby (~> 1.0)
|
27
30
|
minitest (5.13.0)
|
28
31
|
multipart-post (2.1.1)
|
29
32
|
oj (3.9.2)
|
30
|
-
public_suffix (
|
33
|
+
public_suffix (4.0.2)
|
31
34
|
rake (10.5.0)
|
32
35
|
rspec (3.9.0)
|
33
36
|
rspec-core (~> 3.9.0)
|
@@ -42,14 +45,14 @@ GEM
|
|
42
45
|
diff-lcs (>= 1.2.0, < 2.0)
|
43
46
|
rspec-support (~> 3.9.0)
|
44
47
|
rspec-support (3.9.0)
|
45
|
-
safe_yaml (1.0.
|
48
|
+
safe_yaml (1.0.5)
|
46
49
|
thread_safe (0.3.6)
|
47
50
|
tzinfo (1.2.5)
|
48
51
|
thread_safe (~> 0.1)
|
49
|
-
webmock (3.
|
52
|
+
webmock (3.7.6)
|
50
53
|
addressable (>= 2.3.6)
|
51
54
|
crack (>= 0.3.2)
|
52
|
-
hashdiff
|
55
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
53
56
|
zeitwerk (2.2.2)
|
54
57
|
|
55
58
|
PLATFORMS
|
data/README.md
CHANGED
@@ -1,9 +1,4 @@
|
|
1
1
|
# YoutubeParser
|
2
|
-
|
3
|
-
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/youtube_parser`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
6
|
-
|
7
2
|
## Installation
|
8
3
|
|
9
4
|
Add this line to your application's Gemfile:
|
@@ -26,14 +21,31 @@ To fetch youtube channel info:
|
|
26
21
|
```ruby
|
27
22
|
require 'youtube_parser'
|
28
23
|
|
29
|
-
|
24
|
+
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
|
25
|
+
'(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
|
26
|
+
options = { channel_url: 'https://www.youtube.com/channel/channel_id',
|
27
|
+
user_agent: user_agent }
|
28
|
+
|
29
|
+
YoutubeParser::Channel.new(options).info
|
30
30
|
```
|
31
31
|
|
32
|
-
|
32
|
+
To search channels by query:
|
33
|
+
```ruby
|
34
|
+
require 'youtube_parser'
|
33
35
|
|
34
|
-
|
36
|
+
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
|
37
|
+
'(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
|
38
|
+
options = { search_query: 'search query',
|
39
|
+
user_agent: user_agent }
|
35
40
|
|
36
|
-
|
41
|
+
# Search with pagination
|
42
|
+
YoutubeParser::Search::Channels.new(options).for_each_channels do |channel|
|
43
|
+
channel
|
44
|
+
end
|
45
|
+
|
46
|
+
# First page channels
|
47
|
+
YoutubeParser::Search::Channels.new(options).info
|
48
|
+
```
|
37
49
|
|
38
50
|
## Contributing
|
39
51
|
|
data/lib/youtube_parser.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'active_support/core_ext'
|
4
|
-
require 'uri'
|
5
3
|
require 'oj'
|
4
|
+
require 'uri'
|
6
5
|
require 'faraday'
|
7
6
|
require 'ostruct'
|
8
7
|
require 'yaml'
|
9
8
|
require 'pathname'
|
9
|
+
require 'active_support'
|
10
|
+
require 'active_support/core_ext'
|
10
11
|
|
11
12
|
require 'youtube_parser/version'
|
12
13
|
require 'youtube_parser/resource'
|
@@ -15,6 +16,7 @@ require 'youtube_parser/base_parser'
|
|
15
16
|
require 'youtube_parser/channel'
|
16
17
|
require 'youtube_parser/channels/about_section'
|
17
18
|
require 'youtube_parser/channels/videos_section'
|
19
|
+
require 'youtube_parser/search/channels'
|
18
20
|
|
19
21
|
module YoutubeParser
|
20
22
|
def self.root
|
@@ -12,6 +12,7 @@ module YoutubeParser
|
|
12
12
|
|
13
13
|
def collect_channel_info
|
14
14
|
data = about_section_info.merge(video_section_info)
|
15
|
+
data[:channel_url] = channel_url
|
15
16
|
data.select! { |_, v| v.present? }
|
16
17
|
end
|
17
18
|
|
@@ -23,6 +24,15 @@ module YoutubeParser
|
|
23
24
|
@video_section_info ||= section(:videos).info
|
24
25
|
end
|
25
26
|
|
27
|
+
def channel_url
|
28
|
+
uri = URI(client.class::BASE_URL)
|
29
|
+
uri.path = options.channel_url
|
30
|
+
|
31
|
+
uri.to_s
|
32
|
+
rescue URI::InvalidComponentError
|
33
|
+
options.channel_url
|
34
|
+
end
|
35
|
+
|
26
36
|
def section(title)
|
27
37
|
constant_name = "YoutubeParser::Channels::#{title.capitalize}Section"
|
28
38
|
opts = { channel_url: options.channel_url, client: client }
|
@@ -8,6 +8,7 @@ module YoutubeParser
|
|
8
8
|
def info
|
9
9
|
{
|
10
10
|
title: title,
|
11
|
+
email: email,
|
11
12
|
description: description,
|
12
13
|
country: country,
|
13
14
|
tags: tags,
|
@@ -24,6 +25,11 @@ module YoutubeParser
|
|
24
25
|
section.dig(*keys.title)
|
25
26
|
end
|
26
27
|
|
28
|
+
def email
|
29
|
+
regex = /#{URI::MailTo::EMAIL_REGEXP.source.gsub(/\\A|\\z/, '')}/
|
30
|
+
description[regex]
|
31
|
+
end
|
32
|
+
|
27
33
|
def description
|
28
34
|
descriptions = [section.dig(*keys.description_first),
|
29
35
|
section.dig(*keys.description_second)]
|
@@ -18,13 +18,16 @@ module YoutubeParser
|
|
18
18
|
return @video_ids if @video_ids&.any?
|
19
19
|
|
20
20
|
sections.each do |section|
|
21
|
-
videos = section.dig(*keys.video_section_tabs)
|
21
|
+
videos = section.dig(*keys.video_section_tabs) ||
|
22
|
+
section.dig(*keys.second_video_section)
|
22
23
|
next unless videos.is_a? Array
|
23
24
|
|
24
|
-
@video_ids = videos
|
25
|
+
@video_ids = scrape_video_ids videos
|
25
26
|
|
26
27
|
return @video_ids if @video_ids.any?
|
27
28
|
end
|
29
|
+
|
30
|
+
[]
|
28
31
|
end
|
29
32
|
|
30
33
|
def playlist_id
|
@@ -34,6 +37,10 @@ module YoutubeParser
|
|
34
37
|
@playlist_id = section&.dig(*keys.playlist_id)
|
35
38
|
end
|
36
39
|
|
40
|
+
def scrape_video_ids(videos)
|
41
|
+
videos.map { |video| video.dig(*keys.video_ids) }.compact
|
42
|
+
end
|
43
|
+
|
37
44
|
def sections
|
38
45
|
@sections ||= json.dig(*keys.section_tabs) || []
|
39
46
|
end
|
@@ -13,6 +13,8 @@ module YoutubeParser
|
|
13
13
|
|
14
14
|
def get(endpoint, options = {})
|
15
15
|
response = client.get(endpoint, options)
|
16
|
+
return {} unless response.status.eql?(200)
|
17
|
+
|
16
18
|
json_str = response.body[DATA_REGEX, 1]
|
17
19
|
|
18
20
|
get_json json_str
|
@@ -24,6 +26,7 @@ module YoutubeParser
|
|
24
26
|
opts = { ssl: { verify: false }, request: { timeout: 10 } }
|
25
27
|
@client ||= Faraday.new(BASE_URL, opts) do |request|
|
26
28
|
request.adapter Faraday.default_adapter
|
29
|
+
request.headers['Accept-Language'] = 'en-US'
|
27
30
|
request.headers['User-Agent'] = options.user_agent if options.user_agent
|
28
31
|
request.proxy = proxy if options.proxy
|
29
32
|
end
|
@@ -11,6 +11,9 @@ playlist_id: ['tabRenderer', 'content', 'sectionListRenderer', 'subMenu',
|
|
11
11
|
'channelSubMenuRenderer', 'playAllButton', 'buttonRenderer',
|
12
12
|
'navigationEndpoint', 'watchPlaylistEndpoint', 'playlistId']
|
13
13
|
video_ids: ['gridVideoRenderer', 'videoId']
|
14
|
+
second_video_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
|
15
|
+
0, 'itemSectionRenderer', 'contents', 0, 'shelfRenderer', 'content',
|
16
|
+
'horizontalListRenderer', 'items']
|
14
17
|
country: ['country', 'simpleText']
|
15
18
|
description_first: ['description', 'simpleText']
|
16
19
|
description_second: ['artistBio', 'simpleText']
|
@@ -21,4 +24,12 @@ views: ['viewCountText', 'runs']
|
|
21
24
|
keywords: ['metadata', 'channelMetadataRenderer', 'keywords']
|
22
25
|
tab_renderer: ['tabRenderer']
|
23
26
|
title: ['title', 'simpleText']
|
24
|
-
statistics: ['networkStatistics', 'statistics']
|
27
|
+
statistics: ['networkStatistics', 'statistics']
|
28
|
+
channels_page: ['contents', 'twoColumnSearchResultsRenderer', 'primaryContents',
|
29
|
+
'sectionListRenderer', 'contents']
|
30
|
+
continuation_contents: ['continuationContents', 'itemSectionContinuation']
|
31
|
+
channels_contents: ['itemSectionRenderer', 'contents']
|
32
|
+
channel_renderer: ['channelRenderer']
|
33
|
+
continuations: ['continuations']
|
34
|
+
renderer_continuations: ['itemSectionRenderer', 'continuations']
|
35
|
+
continuation: ['nextContinuationData', 'continuation']
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YoutubeParser
|
4
|
+
module Search
|
5
|
+
class Channels < BaseParser
|
6
|
+
DEFAULT_PARAMS = { sp: 'EgIQAg%3D%3D' }.freeze
|
7
|
+
|
8
|
+
options :search_query
|
9
|
+
|
10
|
+
def info
|
11
|
+
(channels(search)[:channels] || []).map { |c| channel_info c }
|
12
|
+
end
|
13
|
+
|
14
|
+
def for_each_channel
|
15
|
+
for_each_page do |channels|
|
16
|
+
channels.each { |channel| yield channel_info channel }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def for_each_page
|
23
|
+
channels = channels(search)
|
24
|
+
return if channels.empty? || channels[:channels].empty?
|
25
|
+
|
26
|
+
loop do
|
27
|
+
yield channels[:channels]
|
28
|
+
|
29
|
+
break unless channels[:continuation]
|
30
|
+
|
31
|
+
channels = channels(search(ctoken: channels[:continuation]))
|
32
|
+
|
33
|
+
break if channels.empty?
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def search(opts = {})
|
38
|
+
opts = opts.merge(params)
|
39
|
+
client.get('results', opts)
|
40
|
+
end
|
41
|
+
|
42
|
+
def channels(json)
|
43
|
+
page_contents = json.dig(*keys.channels_page)
|
44
|
+
continuation_contents = json.dig(*keys.continuation_contents)
|
45
|
+
return {} if page_contents.nil? && continuation_contents.nil?
|
46
|
+
|
47
|
+
contents = (page_contents || [continuation_contents&.dig('contents')])
|
48
|
+
contents.each do |content|
|
49
|
+
channels = scrape_channels content
|
50
|
+
channels_hash = {
|
51
|
+
channels: channels,
|
52
|
+
continuation: continuation(continuation_contents, content)
|
53
|
+
}
|
54
|
+
|
55
|
+
return channels_hash if channels.any?
|
56
|
+
end
|
57
|
+
|
58
|
+
{}
|
59
|
+
end
|
60
|
+
|
61
|
+
def continuation(contents, content)
|
62
|
+
continuations = content.dig(*keys.renderer_continuations)
|
63
|
+
continuations = contents&.dig(*keys.continuations) if contents
|
64
|
+
continuations&.map do |cont|
|
65
|
+
cont.dig(*keys.continuation)
|
66
|
+
end&.compact&.first
|
67
|
+
end
|
68
|
+
|
69
|
+
def scrape_channels(content)
|
70
|
+
contents = content
|
71
|
+
contents = content.dig(*keys.channels_contents) if content.is_a? Hash
|
72
|
+
contents&.map { |c| c.dig(*keys.channel_renderer) }&.compact || []
|
73
|
+
end
|
74
|
+
|
75
|
+
def channel_info(channel)
|
76
|
+
channel_url = channel.dig(*keys.channel_url)
|
77
|
+
opts = { channel_url: channel_url, client: client }
|
78
|
+
|
79
|
+
YoutubeParser::Channel.new(opts).info
|
80
|
+
end
|
81
|
+
|
82
|
+
def params
|
83
|
+
options.attributes.slice(*required_options).merge(DEFAULT_PARAMS)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/youtube_parser.gemspec
CHANGED
@@ -27,4 +27,7 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
28
28
|
spec.add_development_dependency 'byebug'
|
29
29
|
spec.add_development_dependency 'webmock'
|
30
|
+
spec.add_dependency 'activesupport'
|
31
|
+
spec.add_dependency 'faraday'
|
32
|
+
spec.add_dependency 'oj'
|
30
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: youtube_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,48 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: faraday
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: oj
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
83
125
|
description:
|
84
126
|
email:
|
85
127
|
- o.vykhor@ukr.net
|
@@ -107,6 +149,7 @@ files:
|
|
107
149
|
- lib/youtube_parser/client.rb
|
108
150
|
- lib/youtube_parser/config/keys.yml
|
109
151
|
- lib/youtube_parser/resource.rb
|
152
|
+
- lib/youtube_parser/search/channels.rb
|
110
153
|
- lib/youtube_parser/version.rb
|
111
154
|
- youtube_parser.gemspec
|
112
155
|
homepage: https://github.com/oleksiivykhor/youtube_parser
|