youtube_parser 0.1.3 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +4 -1
- data/README.md +13 -1
- data/lib/youtube_parser.rb +6 -3
- data/lib/youtube_parser/channels/about_section.rb +27 -3
- data/lib/youtube_parser/channels/channels_section.rb +33 -0
- data/lib/youtube_parser/config/keys.yml +7 -1
- data/lib/youtube_parser/version.rb +1 -1
- data/youtube_parser.gemspec +3 -0
- metadata +45 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f79ddcdfbe0953a3571e1e27abdec5ea0993d5f0426f5d7393a518966bdd7b9b
|
4
|
+
data.tar.gz: 6ab6fe4c91a6affb19ab86dd07508fb8f230380cd62306012bbcd95e37f55537
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b82d6e48e9c73fb02129aee92874becfe3203269ee47942ed92814c5db3f54ffb29c20613916d6e631baf2def4a592ae5a4b34baea200a14d2253ab141bcded
|
7
|
+
data.tar.gz: f676707c34aa5d7c1b7347c1bce676dd117a72eb7cd278de05ecf00953202b31387b4b5b6d04e27287d62a416ace34e7ca65cc378396fa93e792755477280473
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -39,7 +39,7 @@ options = { search_query: 'search query',
|
|
39
39
|
user_agent: user_agent }
|
40
40
|
|
41
41
|
# Search with pagination
|
42
|
-
YoutubeParser::Search::Channels.new(options).
|
42
|
+
YoutubeParser::Search::Channels.new(options).for_each_channel do |channel|
|
43
43
|
channel
|
44
44
|
end
|
45
45
|
|
@@ -47,6 +47,18 @@ end
|
|
47
47
|
YoutubeParser::Search::Channels.new(options).info
|
48
48
|
```
|
49
49
|
|
50
|
+
To parse channel related channels:
|
51
|
+
```ruby
|
52
|
+
require 'youtube_parser'
|
53
|
+
|
54
|
+
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
|
55
|
+
'(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
|
56
|
+
options = { search_query: 'search query',
|
57
|
+
user_agent: user_agent }
|
58
|
+
|
59
|
+
YoutubeParser::Channels::ChannelsSection.new(options).info
|
60
|
+
```
|
61
|
+
|
50
62
|
## Contributing
|
51
63
|
|
52
64
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/youtube_parser. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
data/lib/youtube_parser.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'active_support/core_ext'
|
4
|
-
require 'uri'
|
5
3
|
require 'oj'
|
4
|
+
require 'uri'
|
6
5
|
require 'faraday'
|
7
6
|
require 'ostruct'
|
8
7
|
require 'yaml'
|
9
8
|
require 'pathname'
|
9
|
+
require 'active_support'
|
10
|
+
require 'active_support/core_ext'
|
10
11
|
|
11
12
|
require 'youtube_parser/version'
|
12
13
|
require 'youtube_parser/resource'
|
@@ -15,10 +16,12 @@ require 'youtube_parser/base_parser'
|
|
15
16
|
require 'youtube_parser/channel'
|
16
17
|
require 'youtube_parser/channels/about_section'
|
17
18
|
require 'youtube_parser/channels/videos_section'
|
19
|
+
require 'youtube_parser/channels/channels_section'
|
18
20
|
require 'youtube_parser/search/channels'
|
19
21
|
|
20
22
|
module YoutubeParser
|
21
23
|
def self.root
|
22
|
-
|
24
|
+
path = Gem::Specification.find_by_name('youtube_parser').gem_dir
|
25
|
+
Pathname.new(path)
|
23
26
|
end
|
24
27
|
end
|
@@ -41,11 +41,11 @@ module YoutubeParser
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def tags
|
44
|
-
|
44
|
+
json.dig(*keys.keywords).to_s.split('"').select(&:present?)
|
45
45
|
end
|
46
46
|
|
47
47
|
def avatar_url
|
48
|
-
results =
|
48
|
+
results = json.dig(*keys.meta_avatar) || section.dig(*keys.avatar)
|
49
49
|
return if results.nil? || results.empty?
|
50
50
|
|
51
51
|
results.detect { |t| t&.dig('url') }&.dig('url')
|
@@ -59,7 +59,8 @@ module YoutubeParser
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def followers_count
|
62
|
-
|
62
|
+
count = scrape_subscribers_count
|
63
|
+
count.present? ? count : statistics(:followers_count)
|
63
64
|
end
|
64
65
|
|
65
66
|
def videos_count
|
@@ -73,6 +74,29 @@ module YoutubeParser
|
|
73
74
|
stats[indexes[title.to_sym].to_i].to_s.gsub(/\D+/, '')
|
74
75
|
end
|
75
76
|
|
77
|
+
def scrape_subscribers_count
|
78
|
+
return unless subscribers_match
|
79
|
+
|
80
|
+
number = subscribers_match[:number].gsub(',', '.').to_f
|
81
|
+
return number.to_i unless subscribers_match[:units].present?
|
82
|
+
|
83
|
+
subscribers_with_units number
|
84
|
+
end
|
85
|
+
|
86
|
+
def subscribers_with_units(number)
|
87
|
+
(number * units[subscribers_match[:units].downcase.to_sym]).to_i
|
88
|
+
end
|
89
|
+
|
90
|
+
def subscribers_match
|
91
|
+
regex = /(?<number>[\d.,]+)(?<units>\w?)/
|
92
|
+
subscribers_text = json.dig(*keys.subscribers_count_text).to_s
|
93
|
+
subscribers_text.match(regex)
|
94
|
+
end
|
95
|
+
|
96
|
+
def units
|
97
|
+
{ m: 1_000_000, k: 1_000 }
|
98
|
+
end
|
99
|
+
|
76
100
|
def section
|
77
101
|
return @section if @section
|
78
102
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YoutubeParser
|
4
|
+
module Channels
|
5
|
+
class ChannelsSection < BaseParser
|
6
|
+
options :channel_url
|
7
|
+
|
8
|
+
def info
|
9
|
+
@info ||= channel_urls.map do |url|
|
10
|
+
channel_url = "#{client.class::BASE_URL}#{url}"
|
11
|
+
opts = { channel_url: channel_url, client: client }
|
12
|
+
|
13
|
+
YoutubeParser::Channel.new(opts).info
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def channel_urls
|
20
|
+
channels.map { |c| c.dig(*keys.channels_section_urls) }.compact
|
21
|
+
end
|
22
|
+
|
23
|
+
def channels
|
24
|
+
tabs = json.dig(*keys.section_tabs) || []
|
25
|
+
tabs.map { |t| t.dig(*keys.channels_section) }.compact.first || []
|
26
|
+
end
|
27
|
+
|
28
|
+
def json
|
29
|
+
@json ||= client.get("#{options.channel_url}/channels")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
channel_url: ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl']
|
2
2
|
channel_title: ['title', 'simpleText']
|
3
3
|
subscribers_count: ['subscriberCountText', 'simpleText']
|
4
|
+
subscribers_count_text: ['header', 'c4TabbedHeaderRenderer',
|
5
|
+
'subscriberCountText']
|
4
6
|
section_tabs: ['contents', 'twoColumnBrowseResultsRenderer', 'tabs']
|
5
7
|
about_section_tab: ['tabRenderer', 'content', 'sectionListRenderer',
|
6
8
|
'contents', 0, 'itemSectionRenderer', 'contents', 0,
|
@@ -32,4 +34,8 @@ channels_contents: ['itemSectionRenderer', 'contents']
|
|
32
34
|
channel_renderer: ['channelRenderer']
|
33
35
|
continuations: ['continuations']
|
34
36
|
renderer_continuations: ['itemSectionRenderer', 'continuations']
|
35
|
-
continuation: ['nextContinuationData', 'continuation']
|
37
|
+
continuation: ['nextContinuationData', 'continuation']
|
38
|
+
channels_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
|
39
|
+
0, 'itemSectionRenderer', 'contents', 0, 'gridRenderer', 'items']
|
40
|
+
channels_section_urls: ['gridChannelRenderer', 'navigationEndpoint',
|
41
|
+
'commandMetadata', 'webCommandMetadata', 'url']
|
data/youtube_parser.gemspec
CHANGED
@@ -27,4 +27,7 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
28
28
|
spec.add_development_dependency 'byebug'
|
29
29
|
spec.add_development_dependency 'webmock'
|
30
|
+
spec.add_dependency 'activesupport'
|
31
|
+
spec.add_dependency 'faraday'
|
32
|
+
spec.add_dependency 'oj'
|
30
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: youtube_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,48 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: faraday
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: oj
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
83
125
|
description:
|
84
126
|
email:
|
85
127
|
- o.vykhor@ukr.net
|
@@ -103,6 +145,7 @@ files:
|
|
103
145
|
- lib/youtube_parser/base_parser.rb
|
104
146
|
- lib/youtube_parser/channel.rb
|
105
147
|
- lib/youtube_parser/channels/about_section.rb
|
148
|
+
- lib/youtube_parser/channels/channels_section.rb
|
106
149
|
- lib/youtube_parser/channels/videos_section.rb
|
107
150
|
- lib/youtube_parser/client.rb
|
108
151
|
- lib/youtube_parser/config/keys.yml
|