youtube_parser 0.1.1 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2787d7682c42086c3586dd86a774760b0ee78f314ab297cc06e669cd1c5b8167
4
- data.tar.gz: 537a9cdf529e9b5760661aa75188bfc0990d82402de42776f9ebd6786394bc8c
3
+ metadata.gz: f903b865d02d0e7cd331a737f268a5203669cdce1b1b64ce39c31ea82f941385
4
+ data.tar.gz: e363de69b04ce8a78f172b71dd5e9c2c033bebae040dc7e8100b8278a6f42a51
5
5
  SHA512:
6
- metadata.gz: c131bee117f67f0ad2735d6b5ef82ccd68265db53564fc98f1cbb6f090c954983c7de7470628992757be9ab50b76fa6549d511157441e689322477d7d2d4dbd2
7
- data.tar.gz: b31abce91b7e2a2773f162143aabf96dd71563c24f5e4350e40750e95089491ca4f496c7d44e2a0bea85d4fd0ec51b1088a38ce472864e5d6beaec226cb63b4a
6
+ metadata.gz: e0860723613a734b3e84efeea7315fa59b8a778517f421c365af4381e8120f37d059529656a717339be14f491eb3b85991a8d23432f772529a95fadd19cf9c77
7
+ data.tar.gz: 7690928cbbbed44014abea0ed85743f5614b58d64143800efa76b614e9f6963f2d55d1599b9e7008676caf75cedabd9544edaa630dc35857491449e79f793bb3
data/Gemfile CHANGED
@@ -5,4 +5,4 @@ gemspec
5
5
 
6
6
  gem 'faraday'
7
7
  gem 'oj'
8
- gem 'activesupport'
8
+ gem 'activesupport'
@@ -1,7 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- youtube_parser (0.1.1)
4
+ youtube_parser (0.1.7)
5
+ activesupport
6
+ faraday
7
+ oj
5
8
 
6
9
  GEM
7
10
  remote: https://rubygems.org/
@@ -12,8 +15,8 @@ GEM
12
15
  minitest (~> 5.1)
13
16
  tzinfo (~> 1.1)
14
17
  zeitwerk (~> 2.2)
15
- addressable (2.6.0)
16
- public_suffix (>= 2.0.2, < 4.0)
18
+ addressable (2.7.0)
19
+ public_suffix (>= 2.0.2, < 5.0)
17
20
  byebug (11.0.1)
18
21
  concurrent-ruby (1.1.5)
19
22
  crack (0.4.3)
@@ -21,13 +24,13 @@ GEM
21
24
  diff-lcs (1.3)
22
25
  faraday (0.17.1)
23
26
  multipart-post (>= 1.2, < 3)
24
- hashdiff (0.3.7)
27
+ hashdiff (1.0.0)
25
28
  i18n (1.7.0)
26
29
  concurrent-ruby (~> 1.0)
27
30
  minitest (5.13.0)
28
31
  multipart-post (2.1.1)
29
32
  oj (3.9.2)
30
- public_suffix (3.0.3)
33
+ public_suffix (4.0.2)
31
34
  rake (10.5.0)
32
35
  rspec (3.9.0)
33
36
  rspec-core (~> 3.9.0)
@@ -42,14 +45,14 @@ GEM
42
45
  diff-lcs (>= 1.2.0, < 2.0)
43
46
  rspec-support (~> 3.9.0)
44
47
  rspec-support (3.9.0)
45
- safe_yaml (1.0.4)
48
+ safe_yaml (1.0.5)
46
49
  thread_safe (0.3.6)
47
50
  tzinfo (1.2.5)
48
51
  thread_safe (~> 0.1)
49
- webmock (3.0.1)
52
+ webmock (3.7.6)
50
53
  addressable (>= 2.3.6)
51
54
  crack (>= 0.3.2)
52
- hashdiff
55
+ hashdiff (>= 0.4.0, < 2.0.0)
53
56
  zeitwerk (2.2.2)
54
57
 
55
58
  PLATFORMS
data/README.md CHANGED
@@ -29,11 +29,23 @@ options = { channel_url: 'https://www.youtube.com/channel/channel_id',
29
29
  YoutubeParser::Channel.new(options).info
30
30
  ```
31
31
 
32
- ## Development
32
+ To search channels by query:
33
+ ```ruby
34
+ require 'youtube_parser'
33
35
 
34
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
36
+ user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
37
+ '(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
38
+ options = { search_query: 'search query',
39
+ user_agent: user_agent }
40
+
41
+ # Search with pagination
42
+ YoutubeParser::Search::Channels.new(options).for_each_channel do |channel|
43
+ channel
44
+ end
35
45
 
36
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
46
+ # First page channels
47
+ YoutubeParser::Search::Channels.new(options).info
48
+ ```
37
49
 
38
50
  ## Contributing
39
51
 
@@ -1,12 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_support/core_ext'
4
- require 'uri'
5
3
  require 'oj'
4
+ require 'uri'
6
5
  require 'faraday'
7
6
  require 'ostruct'
8
7
  require 'yaml'
9
8
  require 'pathname'
9
+ require 'active_support'
10
+ require 'active_support/core_ext'
10
11
 
11
12
  require 'youtube_parser/version'
12
13
  require 'youtube_parser/resource'
@@ -15,9 +16,11 @@ require 'youtube_parser/base_parser'
15
16
  require 'youtube_parser/channel'
16
17
  require 'youtube_parser/channels/about_section'
17
18
  require 'youtube_parser/channels/videos_section'
19
+ require 'youtube_parser/search/channels'
18
20
 
19
21
  module YoutubeParser
20
22
  def self.root
21
- Pathname.new(Dir.pwd)
23
+ path = Gem::Specification.find_by_name('youtube_parser').gem_dir
24
+ Pathname.new(path)
22
25
  end
23
26
  end
@@ -12,6 +12,7 @@ module YoutubeParser
12
12
 
13
13
  def collect_channel_info
14
14
  data = about_section_info.merge(video_section_info)
15
+ data[:channel_url] = channel_url
15
16
  data.select! { |_, v| v.present? }
16
17
  end
17
18
 
@@ -23,6 +24,15 @@ module YoutubeParser
23
24
  @video_section_info ||= section(:videos).info
24
25
  end
25
26
 
27
+ def channel_url
28
+ uri = URI(client.class::BASE_URL)
29
+ uri.path = options.channel_url
30
+
31
+ uri.to_s
32
+ rescue URI::InvalidComponentError
33
+ options.channel_url
34
+ end
35
+
26
36
  def section(title)
27
37
  constant_name = "YoutubeParser::Channels::#{title.capitalize}Section"
28
38
  opts = { channel_url: options.channel_url, client: client }
@@ -8,6 +8,7 @@ module YoutubeParser
8
8
  def info
9
9
  {
10
10
  title: title,
11
+ email: email,
11
12
  description: description,
12
13
  country: country,
13
14
  tags: tags,
@@ -18,13 +18,16 @@ module YoutubeParser
18
18
  return @video_ids if @video_ids&.any?
19
19
 
20
20
  sections.each do |section|
21
- videos = section.dig(*keys.video_section_tabs)
21
+ videos = section.dig(*keys.video_section_tabs) ||
22
+ section.dig(*keys.second_video_section)
22
23
  next unless videos.is_a? Array
23
24
 
24
- @video_ids = videos.map { |v| v.dig(*keys.video_ids) }.compact
25
+ @video_ids = scrape_video_ids videos
25
26
 
26
27
  return @video_ids if @video_ids.any?
27
28
  end
29
+
30
+ []
28
31
  end
29
32
 
30
33
  def playlist_id
@@ -34,6 +37,10 @@ module YoutubeParser
34
37
  @playlist_id = section&.dig(*keys.playlist_id)
35
38
  end
36
39
 
40
+ def scrape_video_ids(videos)
41
+ videos.map { |video| video.dig(*keys.video_ids) }.compact
42
+ end
43
+
37
44
  def sections
38
45
  @sections ||= json.dig(*keys.section_tabs) || []
39
46
  end
@@ -13,6 +13,8 @@ module YoutubeParser
13
13
 
14
14
  def get(endpoint, options = {})
15
15
  response = client.get(endpoint, options)
16
+ return {} unless response.status.eql?(200)
17
+
16
18
  json_str = response.body[DATA_REGEX, 1]
17
19
 
18
20
  get_json json_str
@@ -24,6 +26,7 @@ module YoutubeParser
24
26
  opts = { ssl: { verify: false }, request: { timeout: 10 } }
25
27
  @client ||= Faraday.new(BASE_URL, opts) do |request|
26
28
  request.adapter Faraday.default_adapter
29
+ request.headers['Accept-Language'] = 'en-US'
27
30
  request.headers['User-Agent'] = options.user_agent if options.user_agent
28
31
  request.proxy = proxy if options.proxy
29
32
  end
@@ -11,6 +11,9 @@ playlist_id: ['tabRenderer', 'content', 'sectionListRenderer', 'subMenu',
11
11
  'channelSubMenuRenderer', 'playAllButton', 'buttonRenderer',
12
12
  'navigationEndpoint', 'watchPlaylistEndpoint', 'playlistId']
13
13
  video_ids: ['gridVideoRenderer', 'videoId']
14
+ second_video_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
15
+ 0, 'itemSectionRenderer', 'contents', 0, 'shelfRenderer', 'content',
16
+ 'horizontalListRenderer', 'items']
14
17
  country: ['country', 'simpleText']
15
18
  description_first: ['description', 'simpleText']
16
19
  description_second: ['artistBio', 'simpleText']
@@ -21,4 +24,12 @@ views: ['viewCountText', 'runs']
21
24
  keywords: ['metadata', 'channelMetadataRenderer', 'keywords']
22
25
  tab_renderer: ['tabRenderer']
23
26
  title: ['title', 'simpleText']
24
- statistics: ['networkStatistics', 'statistics']
27
+ statistics: ['networkStatistics', 'statistics']
28
+ channels_page: ['contents', 'twoColumnSearchResultsRenderer', 'primaryContents',
29
+ 'sectionListRenderer', 'contents']
30
+ continuation_contents: ['continuationContents', 'itemSectionContinuation']
31
+ channels_contents: ['itemSectionRenderer', 'contents']
32
+ channel_renderer: ['channelRenderer']
33
+ continuations: ['continuations']
34
+ renderer_continuations: ['itemSectionRenderer', 'continuations']
35
+ continuation: ['nextContinuationData', 'continuation']
@@ -13,6 +13,10 @@ module YoutubeParser
13
13
  @attributes ||= @hash.transform_keys(&:to_sym)
14
14
  end
15
15
 
16
+ def keys
17
+ @keys ||= attributes.keys
18
+ end
19
+
16
20
  def method_missing(method, *args, &block)
17
21
  super_method = super
18
22
  return super_method if super_method
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YoutubeParser
4
+ module Search
5
+ class Channels < BaseParser
6
+ DEFAULT_PARAMS = { sp: 'EgIQAg%3D%3D' }.freeze
7
+
8
+ options :search_query
9
+
10
+ def info
11
+ (channels(search)[:channels] || []).map { |c| channel_info c }
12
+ end
13
+
14
+ def for_each_channel
15
+ for_each_page do |channels|
16
+ channels.each { |channel| yield channel_info channel }
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def for_each_page
23
+ channels = channels(search)
24
+ return if channels.empty? || channels[:channels].empty?
25
+
26
+ loop do
27
+ yield channels[:channels]
28
+
29
+ break unless channels[:continuation]
30
+
31
+ channels = channels(search(ctoken: channels[:continuation]))
32
+
33
+ break if channels.empty?
34
+ end
35
+ end
36
+
37
+ def search(opts = {})
38
+ opts = opts.merge(params)
39
+ client.get('results', opts)
40
+ end
41
+
42
+ def channels(json)
43
+ page_contents = json.dig(*keys.channels_page)
44
+ continuation_contents = json.dig(*keys.continuation_contents)
45
+ return {} if page_contents.nil? && continuation_contents.nil?
46
+
47
+ contents = (page_contents || [continuation_contents&.dig('contents')])
48
+ contents.each do |content|
49
+ channels = scrape_channels content
50
+ channels_hash = {
51
+ channels: channels,
52
+ continuation: continuation(continuation_contents, content)
53
+ }
54
+
55
+ return channels_hash if channels.any?
56
+ end
57
+
58
+ {}
59
+ end
60
+
61
+ def continuation(contents, content)
62
+ continuations = content.dig(*keys.renderer_continuations)
63
+ continuations = contents&.dig(*keys.continuations) if contents
64
+ continuations&.map do |cont|
65
+ cont.dig(*keys.continuation)
66
+ end&.compact&.first
67
+ end
68
+
69
+ def scrape_channels(content)
70
+ contents = content
71
+ contents = content.dig(*keys.channels_contents) if content.is_a? Hash
72
+ contents&.map { |c| c.dig(*keys.channel_renderer) }&.compact || []
73
+ end
74
+
75
+ def channel_info(channel)
76
+ channel_url = channel.dig(*keys.channel_url)
77
+ opts = { channel_url: channel_url, client: client }
78
+
79
+ YoutubeParser::Channel.new(opts).info
80
+ end
81
+
82
+ def params
83
+ options.attributes.slice(*required_options).merge(DEFAULT_PARAMS)
84
+ end
85
+ end
86
+ end
87
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module YoutubeParser
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.7'
5
5
  end
@@ -27,4 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency 'rspec', '~> 3.0'
28
28
  spec.add_development_dependency 'byebug'
29
29
  spec.add_development_dependency 'webmock'
30
+ spec.add_dependency 'activesupport'
31
+ spec.add_dependency 'faraday'
32
+ spec.add_dependency 'oj'
30
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - o.vykhor
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-01 00:00:00.000000000 Z
11
+ date: 2020-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,6 +80,48 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: faraday
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: oj
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
83
125
  description:
84
126
  email:
85
127
  - o.vykhor@ukr.net
@@ -107,6 +149,7 @@ files:
107
149
  - lib/youtube_parser/client.rb
108
150
  - lib/youtube_parser/config/keys.yml
109
151
  - lib/youtube_parser/resource.rb
152
+ - lib/youtube_parser/search/channels.rb
110
153
  - lib/youtube_parser/version.rb
111
154
  - youtube_parser.gemspec
112
155
  homepage: https://github.com/oleksiivykhor/youtube_parser