youtube_parser 0.1.0 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9f07597f46f3a960b11c08eae61b02e11c270c23b4ff2c3c56cc2ee09aba4c3
4
- data.tar.gz: b274a8867a72688c69bbe1b9b4ec46a82e107be24b9201f56d6f69fbfa117170
3
+ metadata.gz: 59f991625cce87ba5921dd80698a24afb1cdbc0bf07634e2dd7a5925ae36863a
4
+ data.tar.gz: b9396016cd0de944ba992818accbfe6ad14295ee6aa7800f5117b18789f55db7
5
5
  SHA512:
6
- metadata.gz: b91bc4d8b8d39e1bbeffd081224d7806c9d7de2bafb54f561b495b2e119a74037de5c57107248face916aa863a82a8d939b57432e3b98adc620edce8904dcb5b
7
- data.tar.gz: 484bdd95c5fc999503a9641589bf1f583fb6b2a22b00a4c06fe483f05cda344d5b68e143b367cf5caaf138f79cf05e41fea84c721b4dc4882ea74c604f59f833
6
+ metadata.gz: 513b50c2132ce17ae7a11fc36eecc26b60d3b6ec268a1b85422cd7e13960068a1996a61e11a656c0a2e13dc8524ba1582065d434a9ffb804bc2a0af7701e9972
7
+ data.tar.gz: 5ab15103ef37ba712655de36396835a91247f4f0a4370da52540ca4d2b8b5e6cfb076346f210b30e6968efd885e197c521ccadd7247d01b283d405dc441fca85
data/Gemfile CHANGED
@@ -5,4 +5,4 @@ gemspec
5
5
 
6
6
  gem 'faraday'
7
7
  gem 'oj'
8
- gem 'activesupport'
8
+ gem 'activesupport'
@@ -1,7 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- youtube_parser (0.1.0)
4
+ youtube_parser (0.1.6)
5
+ activesupport
6
+ faraday
7
+ oj
5
8
 
6
9
  GEM
7
10
  remote: https://rubygems.org/
@@ -12,8 +15,8 @@ GEM
12
15
  minitest (~> 5.1)
13
16
  tzinfo (~> 1.1)
14
17
  zeitwerk (~> 2.2)
15
- addressable (2.6.0)
16
- public_suffix (>= 2.0.2, < 4.0)
18
+ addressable (2.7.0)
19
+ public_suffix (>= 2.0.2, < 5.0)
17
20
  byebug (11.0.1)
18
21
  concurrent-ruby (1.1.5)
19
22
  crack (0.4.3)
@@ -21,13 +24,13 @@ GEM
21
24
  diff-lcs (1.3)
22
25
  faraday (0.17.1)
23
26
  multipart-post (>= 1.2, < 3)
24
- hashdiff (0.3.7)
27
+ hashdiff (1.0.0)
25
28
  i18n (1.7.0)
26
29
  concurrent-ruby (~> 1.0)
27
30
  minitest (5.13.0)
28
31
  multipart-post (2.1.1)
29
32
  oj (3.9.2)
30
- public_suffix (3.0.3)
33
+ public_suffix (4.0.2)
31
34
  rake (10.5.0)
32
35
  rspec (3.9.0)
33
36
  rspec-core (~> 3.9.0)
@@ -42,14 +45,14 @@ GEM
42
45
  diff-lcs (>= 1.2.0, < 2.0)
43
46
  rspec-support (~> 3.9.0)
44
47
  rspec-support (3.9.0)
45
- safe_yaml (1.0.4)
48
+ safe_yaml (1.0.5)
46
49
  thread_safe (0.3.6)
47
50
  tzinfo (1.2.5)
48
51
  thread_safe (~> 0.1)
49
- webmock (3.0.1)
52
+ webmock (3.7.6)
50
53
  addressable (>= 2.3.6)
51
54
  crack (>= 0.3.2)
52
- hashdiff
55
+ hashdiff (>= 0.4.0, < 2.0.0)
53
56
  zeitwerk (2.2.2)
54
57
 
55
58
  PLATFORMS
data/README.md CHANGED
@@ -1,9 +1,4 @@
1
1
  # YoutubeParser
2
-
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/youtube_parser`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
6
-
7
2
  ## Installation
8
3
 
9
4
  Add this line to your application's Gemfile:
@@ -26,14 +21,31 @@ To fetch youtube channel info:
26
21
  ```ruby
27
22
  require 'youtube_parser'
28
23
 
29
- YoutubeParser::Channel.new('https://www.youtube.com/channel/channel_id').info
24
+ user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
25
+ '(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
26
+ options = { channel_url: 'https://www.youtube.com/channel/channel_id',
27
+ user_agent: user_agent }
28
+
29
+ YoutubeParser::Channel.new(options).info
30
30
  ```
31
31
 
32
- ## Development
32
+ To search channels by query:
33
+ ```ruby
34
+ require 'youtube_parser'
33
35
 
34
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
36
+ user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 '\
37
+ '(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
38
+ options = { search_query: 'search query',
39
+ user_agent: user_agent }
35
40
 
36
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
41
+ # Search with pagination
42
+ YoutubeParser::Search::Channels.new(options).for_each_channels do |channel|
43
+ channel
44
+ end
45
+
46
+ # First page channels
47
+ YoutubeParser::Search::Channels.new(options).info
48
+ ```
37
49
 
38
50
  ## Contributing
39
51
 
@@ -1,12 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_support/core_ext'
4
- require 'uri'
5
3
  require 'oj'
4
+ require 'uri'
6
5
  require 'faraday'
7
6
  require 'ostruct'
8
7
  require 'yaml'
9
8
  require 'pathname'
9
+ require 'active_support'
10
+ require 'active_support/core_ext'
10
11
 
11
12
  require 'youtube_parser/version'
12
13
  require 'youtube_parser/resource'
@@ -15,6 +16,7 @@ require 'youtube_parser/base_parser'
15
16
  require 'youtube_parser/channel'
16
17
  require 'youtube_parser/channels/about_section'
17
18
  require 'youtube_parser/channels/videos_section'
19
+ require 'youtube_parser/search/channels'
18
20
 
19
21
  module YoutubeParser
20
22
  def self.root
@@ -12,6 +12,7 @@ module YoutubeParser
12
12
 
13
13
  def collect_channel_info
14
14
  data = about_section_info.merge(video_section_info)
15
+ data[:channel_url] = channel_url
15
16
  data.select! { |_, v| v.present? }
16
17
  end
17
18
 
@@ -23,6 +24,15 @@ module YoutubeParser
23
24
  @video_section_info ||= section(:videos).info
24
25
  end
25
26
 
27
+ def channel_url
28
+ uri = URI(client.class::BASE_URL)
29
+ uri.path = options.channel_url
30
+
31
+ uri.to_s
32
+ rescue URI::InvalidComponentError
33
+ options.channel_url
34
+ end
35
+
26
36
  def section(title)
27
37
  constant_name = "YoutubeParser::Channels::#{title.capitalize}Section"
28
38
  opts = { channel_url: options.channel_url, client: client }
@@ -8,6 +8,7 @@ module YoutubeParser
8
8
  def info
9
9
  {
10
10
  title: title,
11
+ email: email,
11
12
  description: description,
12
13
  country: country,
13
14
  tags: tags,
@@ -24,6 +25,11 @@ module YoutubeParser
24
25
  section.dig(*keys.title)
25
26
  end
26
27
 
28
+ def email
29
+ regex = /#{URI::MailTo::EMAIL_REGEXP.source.gsub(/\\A|\\z/, '')}/
30
+ description[regex]
31
+ end
32
+
27
33
  def description
28
34
  descriptions = [section.dig(*keys.description_first),
29
35
  section.dig(*keys.description_second)]
@@ -18,13 +18,16 @@ module YoutubeParser
18
18
  return @video_ids if @video_ids&.any?
19
19
 
20
20
  sections.each do |section|
21
- videos = section.dig(*keys.video_section_tabs)
21
+ videos = section.dig(*keys.video_section_tabs) ||
22
+ section.dig(*keys.second_video_section)
22
23
  next unless videos.is_a? Array
23
24
 
24
- @video_ids = videos.map { |v| v.dig(*keys.video_ids) }.compact
25
+ @video_ids = scrape_video_ids videos
25
26
 
26
27
  return @video_ids if @video_ids.any?
27
28
  end
29
+
30
+ []
28
31
  end
29
32
 
30
33
  def playlist_id
@@ -34,6 +37,10 @@ module YoutubeParser
34
37
  @playlist_id = section&.dig(*keys.playlist_id)
35
38
  end
36
39
 
40
+ def scrape_video_ids(videos)
41
+ videos.map { |video| video.dig(*keys.video_ids) }.compact
42
+ end
43
+
37
44
  def sections
38
45
  @sections ||= json.dig(*keys.section_tabs) || []
39
46
  end
@@ -13,6 +13,8 @@ module YoutubeParser
13
13
 
14
14
  def get(endpoint, options = {})
15
15
  response = client.get(endpoint, options)
16
+ return {} unless response.status.eql?(200)
17
+
16
18
  json_str = response.body[DATA_REGEX, 1]
17
19
 
18
20
  get_json json_str
@@ -24,6 +26,7 @@ module YoutubeParser
24
26
  opts = { ssl: { verify: false }, request: { timeout: 10 } }
25
27
  @client ||= Faraday.new(BASE_URL, opts) do |request|
26
28
  request.adapter Faraday.default_adapter
29
+ request.headers['Accept-Language'] = 'en-US'
27
30
  request.headers['User-Agent'] = options.user_agent if options.user_agent
28
31
  request.proxy = proxy if options.proxy
29
32
  end
@@ -11,6 +11,9 @@ playlist_id: ['tabRenderer', 'content', 'sectionListRenderer', 'subMenu',
11
11
  'channelSubMenuRenderer', 'playAllButton', 'buttonRenderer',
12
12
  'navigationEndpoint', 'watchPlaylistEndpoint', 'playlistId']
13
13
  video_ids: ['gridVideoRenderer', 'videoId']
14
+ second_video_section: ['tabRenderer', 'content', 'sectionListRenderer', 'contents',
15
+ 0, 'itemSectionRenderer', 'contents', 0, 'shelfRenderer', 'content',
16
+ 'horizontalListRenderer', 'items']
14
17
  country: ['country', 'simpleText']
15
18
  description_first: ['description', 'simpleText']
16
19
  description_second: ['artistBio', 'simpleText']
@@ -21,4 +24,12 @@ views: ['viewCountText', 'runs']
21
24
  keywords: ['metadata', 'channelMetadataRenderer', 'keywords']
22
25
  tab_renderer: ['tabRenderer']
23
26
  title: ['title', 'simpleText']
24
- statistics: ['networkStatistics', 'statistics']
27
+ statistics: ['networkStatistics', 'statistics']
28
+ channels_page: ['contents', 'twoColumnSearchResultsRenderer', 'primaryContents',
29
+ 'sectionListRenderer', 'contents']
30
+ continuation_contents: ['continuationContents', 'itemSectionContinuation']
31
+ channels_contents: ['itemSectionRenderer', 'contents']
32
+ channel_renderer: ['channelRenderer']
33
+ continuations: ['continuations']
34
+ renderer_continuations: ['itemSectionRenderer', 'continuations']
35
+ continuation: ['nextContinuationData', 'continuation']
@@ -13,6 +13,10 @@ module YoutubeParser
13
13
  @attributes ||= @hash.transform_keys(&:to_sym)
14
14
  end
15
15
 
16
+ def keys
17
+ @keys ||= attributes.keys
18
+ end
19
+
16
20
  def method_missing(method, *args, &block)
17
21
  super_method = super
18
22
  return super_method if super_method
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YoutubeParser
4
+ module Search
5
+ class Channels < BaseParser
6
+ DEFAULT_PARAMS = { sp: 'EgIQAg%3D%3D' }.freeze
7
+
8
+ options :search_query
9
+
10
+ def info
11
+ (channels(search)[:channels] || []).map { |c| channel_info c }
12
+ end
13
+
14
+ def for_each_channel
15
+ for_each_page do |channels|
16
+ channels.each { |channel| yield channel_info channel }
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def for_each_page
23
+ channels = channels(search)
24
+ return if channels.empty? || channels[:channels].empty?
25
+
26
+ loop do
27
+ yield channels[:channels]
28
+
29
+ break unless channels[:continuation]
30
+
31
+ channels = channels(search(ctoken: channels[:continuation]))
32
+
33
+ break if channels.empty?
34
+ end
35
+ end
36
+
37
+ def search(opts = {})
38
+ opts = opts.merge(params)
39
+ client.get('results', opts)
40
+ end
41
+
42
+ def channels(json)
43
+ page_contents = json.dig(*keys.channels_page)
44
+ continuation_contents = json.dig(*keys.continuation_contents)
45
+ return {} if page_contents.nil? && continuation_contents.nil?
46
+
47
+ contents = (page_contents || [continuation_contents&.dig('contents')])
48
+ contents.each do |content|
49
+ channels = scrape_channels content
50
+ channels_hash = {
51
+ channels: channels,
52
+ continuation: continuation(continuation_contents, content)
53
+ }
54
+
55
+ return channels_hash if channels.any?
56
+ end
57
+
58
+ {}
59
+ end
60
+
61
+ def continuation(contents, content)
62
+ continuations = content.dig(*keys.renderer_continuations)
63
+ continuations = contents&.dig(*keys.continuations) if contents
64
+ continuations&.map do |cont|
65
+ cont.dig(*keys.continuation)
66
+ end&.compact&.first
67
+ end
68
+
69
+ def scrape_channels(content)
70
+ contents = content
71
+ contents = content.dig(*keys.channels_contents) if content.is_a? Hash
72
+ contents&.map { |c| c.dig(*keys.channel_renderer) }&.compact || []
73
+ end
74
+
75
+ def channel_info(channel)
76
+ channel_url = channel.dig(*keys.channel_url)
77
+ opts = { channel_url: channel_url, client: client }
78
+
79
+ YoutubeParser::Channel.new(opts).info
80
+ end
81
+
82
+ def params
83
+ options.attributes.slice(*required_options).merge(DEFAULT_PARAMS)
84
+ end
85
+ end
86
+ end
87
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module YoutubeParser
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.6'
5
5
  end
@@ -27,4 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency 'rspec', '~> 3.0'
28
28
  spec.add_development_dependency 'byebug'
29
29
  spec.add_development_dependency 'webmock'
30
+ spec.add_dependency 'activesupport'
31
+ spec.add_dependency 'faraday'
32
+ spec.add_dependency 'oj'
30
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - o.vykhor
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-12-31 00:00:00.000000000 Z
11
+ date: 2020-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,6 +80,48 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: faraday
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: oj
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
83
125
  description:
84
126
  email:
85
127
  - o.vykhor@ukr.net
@@ -107,6 +149,7 @@ files:
107
149
  - lib/youtube_parser/client.rb
108
150
  - lib/youtube_parser/config/keys.yml
109
151
  - lib/youtube_parser/resource.rb
152
+ - lib/youtube_parser/search/channels.rb
110
153
  - lib/youtube_parser/version.rb
111
154
  - youtube_parser.gemspec
112
155
  homepage: https://github.com/oleksiivykhor/youtube_parser