youtube_parser 0.1.2 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 37de03ca4cf583c72ed032b32f6a69afa8e67562a85cadd753a73e8cff2b3754
4
- data.tar.gz: 4c2dc703e0cf7c954bf48b2e3d5413ed15b693a9282517b7381de11f22092e52
3
+ metadata.gz: 6e7896f15e739e7d88e6de6acf67f07bc543897deffebe0c4e723a3746d84e32
4
+ data.tar.gz: bff5544c0e250bd76c9265f74eaee6d743423c7748030415d0983175b3c263ab
5
5
  SHA512:
6
- metadata.gz: c0c46db11b78fdc4ac1346ea44e67fbdc05ab2417b536e1e588177c3f3d5b4e30fdf57cf523b32b554960b6d966ec8addec3d2e0dd97b35c12dcbc9b855d0a68
7
- data.tar.gz: 304fb644a213c32686c5ce13162a0a5c8dd8c510cefb9d9a331bde8dd1725cdf9ac5535d9db1f0fa8d7c2dd4ad22c63c83c40bdae8c053be1dee5a280fdb3d45
6
+ metadata.gz: 7b6ae178f21036ff1f620982349975cd88973aa51443a59caf8efa81a0da5a4b1bbd135f00c383f0e3c672a1edd63c74d12219c346f364066bbc952f8f59a525
7
+ data.tar.gz: 2b30490336766753b5b18aa7f0bdc5f61eea7ac6dd13c97777466c2ec5f2f25c8df3bde616152243811882b8c2c656625a50c68989cb80f51c41ff1a2939428e
data/Gemfile CHANGED
@@ -5,4 +5,4 @@ gemspec
5
5
 
6
6
  gem 'faraday'
7
7
  gem 'oj'
8
- gem 'activesupport'
8
+ gem 'activesupport'
@@ -1,7 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- youtube_parser (0.1.2)
4
+ youtube_parser (0.1.8)
5
+ activesupport
6
+ faraday
7
+ oj
5
8
 
6
9
  GEM
7
10
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -29,11 +29,23 @@ options = { channel_url: 'https://www.youtube.com/channel/channel_id',
29
29
  YoutubeParser::Channel.new(options).info
30
30
  ```
31
31
 
32
- ## Development
32
+ To search channels by query:
33
+ ```ruby
34
+ require 'youtube_parser'
33
35
 
34
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
36
+ user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
37
+ '(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
38
+ options = { search_query: 'search query',
39
+ user_agent: user_agent }
40
+
41
+ # Search with pagination
42
+ YoutubeParser::Search::Channels.new(options).for_each_channel do |channel|
43
+ channel
44
+ end
35
45
 
36
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
46
+ # First page channels
47
+ YoutubeParser::Search::Channels.new(options).info
48
+ ```
37
49
 
38
50
  ## Contributing
39
51
 
@@ -1,12 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_support/core_ext'
4
- require 'uri'
5
3
  require 'oj'
4
+ require 'uri'
6
5
  require 'faraday'
7
6
  require 'ostruct'
8
7
  require 'yaml'
9
8
  require 'pathname'
9
+ require 'active_support'
10
+ require 'active_support/core_ext'
10
11
 
11
12
  require 'youtube_parser/version'
12
13
  require 'youtube_parser/resource'
@@ -15,9 +16,11 @@ require 'youtube_parser/base_parser'
15
16
  require 'youtube_parser/channel'
16
17
  require 'youtube_parser/channels/about_section'
17
18
  require 'youtube_parser/channels/videos_section'
19
+ require 'youtube_parser/search/channels'
18
20
 
19
21
  module YoutubeParser
20
22
  def self.root
21
- Pathname.new(Dir.pwd)
23
+ path = Gem::Specification.find_by_name('youtube_parser').gem_dir
24
+ Pathname.new(path)
22
25
  end
23
26
  end
@@ -12,6 +12,7 @@ module YoutubeParser
12
12
 
13
13
  def collect_channel_info
14
14
  data = about_section_info.merge(video_section_info)
15
+ data[:channel_url] = channel_url
15
16
  data.select! { |_, v| v.present? }
16
17
  end
17
18
 
@@ -23,6 +24,15 @@ module YoutubeParser
23
24
  @video_section_info ||= section(:videos).info
24
25
  end
25
26
 
27
+ def channel_url
28
+ uri = URI(client.class::BASE_URL)
29
+ uri.path = options.channel_url
30
+
31
+ uri.to_s
32
+ rescue URI::InvalidComponentError
33
+ options.channel_url
34
+ end
35
+
26
36
  def section(title)
27
37
  constant_name = "YoutubeParser::Channels::#{title.capitalize}Section"
28
38
  opts = { channel_url: options.channel_url, client: client }
@@ -41,11 +41,11 @@ module YoutubeParser
41
41
  end
42
42
 
43
43
  def tags
44
- section.dig(*keys.keywords)
44
+ json.dig(*keys.keywords).to_s.split('"').select(&:present?)
45
45
  end
46
46
 
47
47
  def avatar_url
48
- results = section.dig(*keys.avatar) || section.dig(*keys.meta_avatar)
48
+ results = json.dig(*keys.meta_avatar) || section.dig(*keys.avatar)
49
49
  return if results.nil? || results.empty?
50
50
 
51
51
  results.detect { |t| t&.dig('url') }&.dig('url')
@@ -59,7 +59,8 @@ module YoutubeParser
59
59
  end
60
60
 
61
61
  def followers_count
62
- statistics :followers_count
62
+ count = scrape_subscribers_count
63
+ count.present? ? count : statistics(:followers_count)
63
64
  end
64
65
 
65
66
  def videos_count
@@ -73,6 +74,29 @@ module YoutubeParser
73
74
  stats[indexes[title.to_sym].to_i].to_s.gsub(/\D+/, '')
74
75
  end
75
76
 
77
+ def scrape_subscribers_count
78
+ return unless subscribers_match
79
+
80
+ number = subscribers_match[:number].gsub(',', '.').to_f
81
+ return number.to_i unless subscribers_match[:units].present?
82
+
83
+ subscribers_with_units number
84
+ end
85
+
86
+ def subscribers_with_units(number)
87
+ (number * units[subscribers_match[:units].downcase.to_sym]).to_i
88
+ end
89
+
90
+ def subscribers_match
91
+ regex = /(?<number>[\d.,]+)(?<units>\w?)/
92
+ subscribers_text = json.dig(*keys.subscribers_count_text).to_s
93
+ subscribers_text.match(regex)
94
+ end
95
+
96
+ def units
97
+ { m: 1_000_000, k: 1_000 }
98
+ end
99
+
76
100
  def section
77
101
  return @section if @section
78
102
 
@@ -18,13 +18,16 @@ module YoutubeParser
18
18
  return @video_ids if @video_ids&.any?
19
19
 
20
20
  sections.each do |section|
21
- videos = section.dig(*keys.video_section_tabs)
21
+ videos = section.dig(*keys.video_section_tabs) ||
22
+ section.dig(*keys.second_video_section)
22
23
  next unless videos.is_a? Array
23
24
 
24
- @video_ids = videos.map { |v| v.dig(*keys.video_ids) }.compact
25
+ @video_ids = scrape_video_ids videos
25
26
 
26
27
  return @video_ids if @video_ids.any?
27
28
  end
29
+
30
+ []
28
31
  end
29
32
 
30
33
  def playlist_id
@@ -34,6 +37,10 @@ module YoutubeParser
34
37
  @playlist_id = section&.dig(*keys.playlist_id)
35
38
  end
36
39
 
40
+ def scrape_video_ids(videos)
41
+ videos.map { |video| video.dig(*keys.video_ids) }.compact
42
+ end
43
+
37
44
  def sections
38
45
  @sections ||= json.dig(*keys.section_tabs) || []
39
46
  end
@@ -13,6 +13,8 @@ module YoutubeParser
13
13
 
14
14
  def get(endpoint, options = {})
15
15
  response = client.get(endpoint, options)
16
+ return {} unless response.status.eql?(200)
17
+
16
18
  json_str = response.body[DATA_REGEX, 1]
17
19
 
18
20
  get_json json_str
@@ -1,6 +1,8 @@
1
1
  channel_url: ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl']
2
2
  channel_title: ['title', 'simpleText']
3
3
  subscribers_count: ['subscriberCountText', 'simpleText']
4
+ subscribers_count_text: ['header', 'c4TabbedHeaderRenderer',
5
+ 'subscriberCountText']
4
6
  section_tabs: ['contents', 'twoColumnBrowseResultsRenderer', 'tabs']
5
7
  about_section_tab: ['tabRenderer', 'content', 'sectionListRenderer',
6
8
  'contents', 0, 'itemSectionRenderer', 'contents', 0,
@@ -11,6 +13,9 @@ playlist_id: ['tabRenderer', 'content', 'sectionListRenderer', 'subMenu',
11
13
  'channelSubMenuRenderer', 'playAllButton', 'buttonRenderer',
12
14
  'navigationEndpoint', 'watchPlaylistEndpoint', 'playlistId']
13
15
  video_ids: ['gridVideoRenderer', 'videoId']
16
+ second_video_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
17
+ 0, 'itemSectionRenderer', 'contents', 0, 'shelfRenderer', 'content',
18
+ 'horizontalListRenderer', 'items']
14
19
  country: ['country', 'simpleText']
15
20
  description_first: ['description', 'simpleText']
16
21
  description_second: ['artistBio', 'simpleText']
@@ -21,4 +26,12 @@ views: ['viewCountText', 'runs']
21
26
  keywords: ['metadata', 'channelMetadataRenderer', 'keywords']
22
27
  tab_renderer: ['tabRenderer']
23
28
  title: ['title', 'simpleText']
24
- statistics: ['networkStatistics', 'statistics']
29
+ statistics: ['networkStatistics', 'statistics']
30
+ channels_page: ['contents', 'twoColumnSearchResultsRenderer', 'primaryContents',
31
+ 'sectionListRenderer', 'contents']
32
+ continuation_contents: ['continuationContents', 'itemSectionContinuation']
33
+ channels_contents: ['itemSectionRenderer', 'contents']
34
+ channel_renderer: ['channelRenderer']
35
+ continuations: ['continuations']
36
+ renderer_continuations: ['itemSectionRenderer', 'continuations']
37
+ continuation: ['nextContinuationData', 'continuation']
@@ -13,6 +13,10 @@ module YoutubeParser
13
13
  @attributes ||= @hash.transform_keys(&:to_sym)
14
14
  end
15
15
 
16
+ def keys
17
+ @keys ||= attributes.keys
18
+ end
19
+
16
20
  def method_missing(method, *args, &block)
17
21
  super_method = super
18
22
  return super_method if super_method
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YoutubeParser
4
+ module Search
5
+ class Channels < BaseParser
6
+ DEFAULT_PARAMS = { sp: 'EgIQAg%3D%3D' }.freeze
7
+
8
+ options :search_query
9
+
10
+ def info
11
+ (channels(search)[:channels] || []).map { |c| channel_info c }
12
+ end
13
+
14
+ def for_each_channel
15
+ for_each_page do |channels|
16
+ channels.each { |channel| yield channel_info channel }
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def for_each_page
23
+ channels = channels(search)
24
+ return if channels.empty? || channels[:channels].empty?
25
+
26
+ loop do
27
+ yield channels[:channels]
28
+
29
+ break unless channels[:continuation]
30
+
31
+ channels = channels(search(ctoken: channels[:continuation]))
32
+
33
+ break if channels.empty?
34
+ end
35
+ end
36
+
37
+ def search(opts = {})
38
+ opts = opts.merge(params)
39
+ client.get('results', opts)
40
+ end
41
+
42
+ def channels(json)
43
+ page_contents = json.dig(*keys.channels_page)
44
+ continuation_contents = json.dig(*keys.continuation_contents)
45
+ return {} if page_contents.nil? && continuation_contents.nil?
46
+
47
+ contents = (page_contents || [continuation_contents&.dig('contents')])
48
+ contents.each do |content|
49
+ channels = scrape_channels content
50
+ channels_hash = {
51
+ channels: channels,
52
+ continuation: continuation(continuation_contents, content)
53
+ }
54
+
55
+ return channels_hash if channels.any?
56
+ end
57
+
58
+ {}
59
+ end
60
+
61
+ def continuation(contents, content)
62
+ continuations = content.dig(*keys.renderer_continuations)
63
+ continuations = contents&.dig(*keys.continuations) if contents
64
+ continuations&.map do |cont|
65
+ cont.dig(*keys.continuation)
66
+ end&.compact&.first
67
+ end
68
+
69
+ def scrape_channels(content)
70
+ contents = content
71
+ contents = content.dig(*keys.channels_contents) if content.is_a? Hash
72
+ contents&.map { |c| c.dig(*keys.channel_renderer) }&.compact || []
73
+ end
74
+
75
+ def channel_info(channel)
76
+ channel_url = channel.dig(*keys.channel_url)
77
+ opts = { channel_url: channel_url, client: client }
78
+
79
+ YoutubeParser::Channel.new(opts).info
80
+ end
81
+
82
+ def params
83
+ options.attributes.slice(*required_options).merge(DEFAULT_PARAMS)
84
+ end
85
+ end
86
+ end
87
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module YoutubeParser
4
- VERSION = '0.1.2'
4
+ VERSION = '0.1.8'
5
5
  end
@@ -27,4 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency 'rspec', '~> 3.0'
28
28
  spec.add_development_dependency 'byebug'
29
29
  spec.add_development_dependency 'webmock'
30
+ spec.add_dependency 'activesupport'
31
+ spec.add_dependency 'faraday'
32
+ spec.add_dependency 'oj'
30
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - o.vykhor
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-01 00:00:00.000000000 Z
11
+ date: 2020-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,6 +80,48 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: faraday
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: oj
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
83
125
  description:
84
126
  email:
85
127
  - o.vykhor@ukr.net
@@ -107,6 +149,7 @@ files:
107
149
  - lib/youtube_parser/client.rb
108
150
  - lib/youtube_parser/config/keys.yml
109
151
  - lib/youtube_parser/resource.rb
152
+ - lib/youtube_parser/search/channels.rb
110
153
  - lib/youtube_parser/version.rb
111
154
  - youtube_parser.gemspec
112
155
  homepage: https://github.com/oleksiivykhor/youtube_parser