natalia 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a1cd40e71255d902fb12309ee83888b4ea78b322baf87caaaaa8aaafca0d3886
4
+ data.tar.gz: de2c722d139e874be4675975a909d745182e73bdf5e41cd6f2542b71e53dc197
5
+ SHA512:
6
+ metadata.gz: 4f65cc234804d15401dc4902b9dcd87804408cd3c97cfae2c8e17d9044768725781c14d14fdf75b0c467916da795ce5cfaee9b77a4fe548cbb9a5d63684d48dc
7
+ data.tar.gz: 583b39aefe55add76fb4af7c25c395ab6e2ab6363fa0e7e81b0a246c957be50087cab8dd48442496d001a2c6e17e16bb61fdb48463e54127335ff829ae8faca7
data/.rubocop.yml ADDED
@@ -0,0 +1,45 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.6
3
+
4
+ Metrics/AbcSize:
5
+ Max: 170
6
+
7
+ Metrics/BlockLength:
8
+ Max: 250
9
+
10
+ Metrics/ClassLength:
11
+ Max: 1000
12
+
13
+ Metrics/CyclomaticComplexity:
14
+ Max: 70
15
+
16
+ Metrics/MethodLength:
17
+ Max: 100
18
+
19
+ Metrics/ModuleLength:
20
+ Max: 1000
21
+
22
+ Metrics/ParameterLists:
23
+ Max: 50
24
+
25
+ Metrics/PerceivedComplexity:
26
+ Max: 80
27
+
28
+ Layout/SpaceInsideBlockBraces:
29
+ EnforcedStyle: no_space
30
+ SpaceBeforeBlockParameters: false
31
+
32
+ Layout/LineLength:
33
+ Max: 180
34
+
35
+ Style/Documentation:
36
+ Enabled: false
37
+
38
+ Style/NumericPredicate:
39
+ EnforcedStyle: comparison
40
+
41
+ Style/MultilineBlockChain:
42
+ Enabled: false
43
+
44
+ Style/SpecialGlobalVars:
45
+ EnforcedStyle: use_perl_names
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in natalia.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023 Ishotihadus
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Natalia
2
+
3
+ 日本語の歌詞掲載サイトから歌詞を取得するライブラリです。
4
+
5
+ ## Installation
6
+
7
+ Install the gem and add to the application's Gemfile by executing:
8
+
9
+ $ bundle add natalia
10
+
11
+ If bundler is not being used to manage dependencies, install the gem by executing:
12
+
13
+ $ gem install natalia
14
+
15
+ ## Usage
16
+
17
+ ```rb
18
+ require 'natalia'
19
+
20
+ # 歌詞情報のエントリを取ってくる
21
+ songs = Natalia.search_by_title('永遠の花')
22
+
23
+ # 歌詞の取得
24
+ songs.first.get.lyrics
25
+ # => "窓あけたら 花瓶の花が\n風に誘われて 揺れたよ\nそう..."
26
+ ```
27
+
28
+ ## Contributing
29
+
30
+ Bug reports and pull requests are welcome on GitHub at https://github.com/Ishotihadus/natalia.
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ task default: %i[]
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require_relative '../utils'
5
+
6
+ module Natalia
7
+ module Joysound
8
+ SORT_MAP = {
9
+ title: %w[name asc].freeze,
10
+ title_desc: %w[name desc].freeze,
11
+ popularity: %w[popular asc].freeze,
12
+ popularity_desc: %w[popular desc].freeze,
13
+ date: %w[new asc].freeze,
14
+ date_desc: %w[new desc].freeze,
15
+ artist: %w[artist asc].freeze,
16
+ artist_desc: %w[artist desc].freeze
17
+ }.freeze
18
+
19
+ KIND_MAP = {
20
+ title: 'song',
21
+ artist: 'selArtist',
22
+ service: 'selService' # 配信機種的なやつ?
23
+ }.freeze
24
+
25
+ def self.search(keyword, type: :title, sort: :popularity_desc)
26
+ sort = SORT_MAP[sort]
27
+ raise ArgumentError, 'invalid sort type' unless sort
28
+
29
+ response = Natalia::Utils.curl_post('https://mspxy.joysound.com/Common/ContentsList') do |request|
30
+ request.content_type = 'application/x-www-form-urlencoded; charset=UTF-8'
31
+ request['X-Jsp-App-Name'] = '0000800'
32
+ request.set_form_data(
33
+ format: 'all',
34
+ kindCnt: '1', # この数の分だけ kind1, word1, match1 を指定できるっぽい
35
+ start: '1',
36
+ count: '999',
37
+ sort: sort[0],
38
+ order: sort[1],
39
+ kind1: KIND_MAP[type],
40
+ word1: keyword,
41
+ match1: type == :title ? 'partial' : 'exact', # partial / front / exact
42
+ apiVer: '1.0'
43
+ )
44
+ end
45
+
46
+ response.value
47
+ json = JSON.parse(response.body.force_encoding('utf-8'))
48
+ contents_list = json['contentsList']
49
+
50
+ contents_list.select! do |e|
51
+ e['serviceTypeList'].any? do |service|
52
+ # なんかこの 2 つを除外して残らないと歌詞が配信されていないっぽい?
53
+ service['serviceType'] != '001000000' && service['serviceType'] != '100000000'
54
+ end
55
+ end
56
+
57
+ contents_list.map do |e|
58
+ {
59
+ source: self,
60
+ id: e['naviGroupId'],
61
+ title: e['songName'],
62
+ artist: e['artistName'],
63
+ artist_id: e['artistId'],
64
+ lyricist: e['lyricist'],
65
+ composer: e['composer'],
66
+ raw_data: e
67
+ }
68
+ end
69
+ end
70
+
71
+ def self.get(id)
72
+ response = Natalia::Utils.curl_post('https://mspxy.joysound.com/Common/Lyric') do |request|
73
+ request.content_type = 'application/x-www-form-urlencoded; charset=UTF-8'
74
+ request['X-Jsp-App-Name'] = '0000800'
75
+ request.set_form_data(
76
+ kind: 'naviGroupId',
77
+ selSongNo: id,
78
+ interactionFlg: '0',
79
+ apiVer: '1.0'
80
+ )
81
+ end
82
+
83
+ return nil if response.code == '404'
84
+
85
+ response.value
86
+ json = JSON.parse(response.body.force_encoding('utf-8'))
87
+
88
+ {
89
+ source: self,
90
+ id: json['naviGroupId'],
91
+ title: json['songName'],
92
+ artist: json['artistName'],
93
+ artist_id: json['artistId'],
94
+ lyricist: json['lyricist'],
95
+ composer: json['composer'],
96
+ lyrics: json['lyricList']&.find {|e| e['statusCode'] == '1'}&.[]('lyric')&.strip,
97
+ raw_data: json
98
+ }
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require_relative '../utils'
5
+
6
+ module Natalia
7
+ module Utamap
8
+ SORT_MAP = {
9
+ title: 1,
10
+ title_desc: 2,
11
+ popularity: 3,
12
+ popularity_desc: 4,
13
+ date: 5,
14
+ date_desc: 6,
15
+ artist: 7,
16
+ artist_desc: 8
17
+ }.freeze
18
+
19
+ # @param [String] keyword キーワード(タイトル検索)、ID(それ以外)
20
+ def self.search(keyword, type: :title, sort: :popularity_desc)
21
+ sort = SORT_MAP[sort]
22
+ raise ArgumentError, 'invalid sort type' unless sort
23
+ raise ArgumentError, 'type must be :title, :artist, :lyricist, :composer, or :arranger' unless %i[title artist lyricist composer arranger].include?(type)
24
+
25
+ entries = []
26
+ (type == :title ? 1.. : [1]).each do |page|
27
+ response =
28
+ if type == :title
29
+ Natalia::Utils.curl_get('https://www.uta-net.com/search/', { Keyword: keyword, Aselect: 2, Bselect: 3, sort: sort, pnum: page })
30
+ else
31
+ Natalia::Utils.curl_get("https://www.uta-net.com/#{type}/#{keyword}/#{sort}/")
32
+ end
33
+ break if response.code == '404'
34
+
35
+ response.value
36
+
37
+ doc = Nokogiri::HTML.parse(response.body)
38
+ tbody = doc.at_css('tbody.songlist-table-body')
39
+ tbody.css('tr').each do |tr|
40
+ tds = tr.css('td')
41
+ entries << {
42
+ source: self,
43
+ id: tds[0].at_css('a')['href'].match(%r{^/song/(\d+)/})[1],
44
+ title: tr.at_css('span.songlist-title').content,
45
+ artist: tds[1].at_css('a').content,
46
+ artist_id: tds[1].at_css('a')['href'].match(%r{^/artist/(\d+)/})[1],
47
+ lyricist: tds[2].at_css('a').content,
48
+ lyricist_id: tds[2].at_css('a')['href'].match(%r{^/lyricist/(\d+)/})[1],
49
+ composer: tds[3].at_css('a').content,
50
+ composer_id: tds[3].at_css('a')['href'].match(%r{^/composer/(\d+)/})[1],
51
+ arranger: tds[4].at_css('a').content,
52
+ arranger_id: tds[4].at_css('a')['href'].match(%r{^/arranger/(\d+)/})[1],
53
+ lyrics: tds[5].at_css('span.pc-utaidashi').content
54
+ }
55
+ end
56
+ end
57
+
58
+ entries
59
+ end
60
+
61
+ def self.get(id)
62
+ response = Natalia::Utils.curl_get("https://www.uta-net.com/song/#{id}/")
63
+ response.value
64
+
65
+ doc = Nokogiri::HTML.parse(response.body)
66
+
67
+ ret = {
68
+ source: self,
69
+ title: doc.at_css('div.song-infoboard h2').content,
70
+ artist: doc.at_css('div.song-infoboard span[itemprop="byArtist name"]').content,
71
+ artist_id: doc.at_css('div.song-infoboard a[itemprop="byArtist"]')['href'].match(%r{^/artist/(\d+)/})[1]
72
+ }
73
+
74
+ doc.at_css('div.song-infoboard a[itemprop="lyricist"]').tap do |e|
75
+ next unless e
76
+
77
+ ret[:lyricist] = e.content
78
+ ret[:lyricist_id] = e['href'].match(%r{^/lyricist/(\d+)/})[1]
79
+ end
80
+
81
+ doc.at_css('div.song-infoboard a[itemprop="composer"]').tap do |e|
82
+ next unless e
83
+
84
+ ret[:composer] = e.content
85
+ ret[:composer_id] = e['href'].match(%r{^/composer/(\d+)/})[1]
86
+ end
87
+
88
+ doc.at_css('div.song-infoboard a[itemprop="arranger"]').tap do |e|
89
+ next unless e
90
+
91
+ ret[:arranger] = e.content
92
+ ret[:arranger_id] = e['href'].match(%r{^/arranger/(\d+)/})[1]
93
+ end
94
+
95
+ ret[:lyrics] = doc.at_css('div#kashi_area').children.map do |e|
96
+ e.name == 'br' ? "\n" : e.content
97
+ end.join.strip
98
+
99
+ ret
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+
5
+ module Natalia
6
+ module Utils
7
+ def self.curl_get(url, params = {})
8
+ uri = URI(url)
9
+ uri.query = URI.encode_www_form(URI.decode_www_form(uri.query || '') + URI.decode_www_form(URI.encode_www_form(params)))
10
+
11
+ request = Net::HTTP::Get.new(uri)
12
+ request['user-agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.0.0'
13
+ yield(request) if block_given?
14
+
15
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') {|http| http.request(request)}
16
+ end
17
+
18
+ def self.curl_post(url)
19
+ uri = URI(url)
20
+
21
+ request = Net::HTTP::Post.new(uri)
22
+ request['user-agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.0.0'
23
+ yield(request) if block_given?
24
+
25
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') {|http| http.request(request)}
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natalia
4
+ VERSION = '0.1.0'
5
+ end
data/lib/natalia.rb ADDED
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'natalia/services/joysound'
4
+ require_relative 'natalia/services/utamap'
5
+ require_relative 'natalia/version'
6
+
7
+ module Natalia
8
+ class Entry
9
+ def initialize(entry)
10
+ @entry = entry
11
+ end
12
+
13
+ def respond_to_missing?(name, _include_private = false)
14
+ @entry.key?(name)
15
+ end
16
+
17
+ def method_missing(name, *_args)
18
+ @entry[name]
19
+ end
20
+ end
21
+
22
+ class Song
23
+ def initialize(entry)
24
+ @entry = entry
25
+ end
26
+
27
+ def [](key)
28
+ @entry[key]
29
+ end
30
+
31
+ def get
32
+ Entry.new(@entry[:source].get(@entry[:id]))
33
+ end
34
+
35
+ def respond_to_missing?(name, _include_private = false)
36
+ @entry.key?(name)
37
+ end
38
+
39
+ def method_missing(name, *_args)
40
+ @entry[name]
41
+ end
42
+ end
43
+
44
+ # 信頼できるソース順に並べる
45
+ SERVICES = [
46
+ Natalia::Utamap,
47
+ Natalia::Joysound
48
+ ].freeze
49
+
50
+ def self.search_by_title(keyword, sort: :popularity_desc)
51
+ entries = []
52
+ SERVICES.each do |service|
53
+ entries += service.search(keyword, type: :title, sort: sort)
54
+ end
55
+ entries.map {|entry| Song.new(entry)}
56
+ end
57
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: natalia
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ishotihadus
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-05-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: Lyrics scraper for Japanese songs
28
+ email:
29
+ - 5352175+Ishotihadus@users.noreply.github.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".rubocop.yml"
35
+ - Gemfile
36
+ - LICENSE.txt
37
+ - README.md
38
+ - Rakefile
39
+ - lib/natalia.rb
40
+ - lib/natalia/services/joysound.rb
41
+ - lib/natalia/services/utamap.rb
42
+ - lib/natalia/utils.rb
43
+ - lib/natalia/version.rb
44
+ homepage: https://github.com/natalia
45
+ licenses:
46
+ - MIT
47
+ metadata:
48
+ homepage_uri: https://github.com/natalia
49
+ source_code_uri: https://github.com/natalia
50
+ post_install_message:
51
+ rdoc_options: []
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: 2.6.0
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements: []
65
+ rubygems_version: 3.4.12
66
+ signing_key:
67
+ specification_version: 4
68
+ summary: Lyrics scraper for Japanese songs
69
+ test_files: []