sakamichi_scraper 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 47b8be892d241fb3f745274fd2dad85bf15ca93216ab19bab45ba77a6f962bef
4
- data.tar.gz: d18194978ae187c6beee80ce3cd7b18452f096a2d7bad7db767316fdbbc27baf
3
+ metadata.gz: 11a74e79888199fbe85816294b9ef042e1373cffbee3a43ad8d05bcf873e42e6
4
+ data.tar.gz: c33c5dcc257be160498d32a58c47aca1051f85d7c1ace6ea98f010658bada8dd
5
5
  SHA512:
6
- metadata.gz: 671f27b24edb3e8a1ce6135201c3d25d3bde9e38b83cfed26223b2626f0491f0f1e4b9a6b59e62cdfd7886eb4aa79c16ee01cd1f927dde4c20f27823e3b4784f
7
- data.tar.gz: 248730c77f4c4f104829cee1878a7b9a5cd6acd13058c1af25afa408ee2e476363730bd11329a20c2d92e4f05bd5569b3a93e004c5656fe167fc452aa3c511d2
6
+ metadata.gz: abd5b9724b17161fc77d9276896872cf6141e2655bf57485a352acf0c4b7fa9a0c999010c478a76b2f5ba01a2b49db0369472b40271183c1fb7b5def7a89a78a
7
+ data.tar.gz: 200345d5e406e879ee66df8689bd852cc134d97a107a4e08dc56bbb7a5f1916ecf6493f91bd28a15192d161311750a3807dcdc77a02dc5bbbca61a5d641bbf34
@@ -17,17 +17,20 @@ jobs:
17
17
  test:
18
18
 
19
19
  runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.6', '2.7']
20
23
 
21
24
  steps:
22
- - uses: actions/checkout@v2
23
- - name: Set up Ruby
24
- # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
25
- # change this to (see https://github.com/ruby/setup-ruby#versioning):
26
- # uses: ruby/setup-ruby@v1
27
- uses: ruby/setup-ruby@ec106b438a1ff6ff109590de34ddc62c540232e0
28
- with:
29
- ruby-version: 2.7
30
- - name: Install dependencies
31
- run: bundle install
32
- - name: Run tests
33
- run: bundle exec rspec spec/*
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler: 2.1.4
34
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
35
+ - name: Run tests
36
+ run: bundle exec rspec spec/*
data/.gitignore CHANGED
@@ -7,6 +7,7 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
  /vendor/
10
+ /img/
10
11
 
11
12
  # ignore files needed only for development
12
13
  .ruby-version
@@ -17,3 +18,5 @@ Gemfile.lock
17
18
 
18
19
  # ignore genereted file from IDE
19
20
  .idea/
21
+ sakamichi_scraper.iml
22
+ .rubocop_todo.yml
data/.rubocop.yml ADDED
@@ -0,0 +1,46 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ # Hashの key/value の記述方法についてkey: value 形式を有効とする
4
+ Style/HashSyntax:
5
+ EnforcedStyle: ruby19
6
+
7
+ # 変数展開やエスケープの必要がない場合はダブルクォートを利用する
8
+ StringLiterals:
9
+ EnforcedStyle: double_quotes
10
+
11
+ # パーセント記法の記号について とりあえずすべて()
12
+ PercentLiteralDelimiters:
13
+ PreferredDelimiters:
14
+ '%': ()
15
+ '%i': ()
16
+ '%q': ()
17
+ '%Q': ()
18
+ '%r': ()
19
+ '%s': ()
20
+ '%w': ()
21
+ '%W': ()
22
+ '%x': ()
23
+
24
+ # 式展開中でもダブルクォートを使う
25
+ Style/StringLiteralsInInterpolation:
26
+ EnforcedStyle: double_quotes
27
+
28
+ # 日本語コメントを許容する
29
+ Style/AsciiComments:
30
+ Enabled: false
31
+
32
+ # 鬱陶しいのでfrozen_string_literalは書かなくてもいいことにする
33
+ Style/FrozenStringLiteralComment:
34
+ Enabled: false
35
+
36
+ # required_ruby_versionはまだ指定しない
37
+ Gemspec/RequiredRubyVersion:
38
+ Enabled: false
39
+
40
+ # initialize classにsuperを強制しない
41
+ Lint/MissingSuper:
42
+ Enabled: false
43
+
44
+ #メソッド行上限を20行にする
45
+ Metrics/MethodLength:
46
+ Max: 20
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in sakamichi_scraper.gemspec
6
6
  gemspec
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # SakamichiScraper
2
-
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/sakamichi_scraper`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
2
+ ## about me
3
+ This gem is used to scrape information from Sakamichi group's(Hinatazaka46, Sakurazaka46) blog.
6
4
 
7
5
  ## Installation
8
6
 
@@ -25,25 +23,43 @@ Or install it yourself as:
25
23
  ### get title of blog
26
24
 
27
25
  #### Hinatazaka46
26
+
28
27
  ```ruby
29
28
  hinata = SakamichiScraper::Hinatazaka.new
30
29
 
31
30
  # get newest title of article
32
- hinata.get_newest_blog_title
31
+ hinata.newest_blog_title
33
32
 
34
33
  # get recent title, member name, timestamp of article
35
- hinata.get_recent_blog_info
34
+ hinata.recent_blog_info
36
35
  ```
37
36
 
38
37
  #### Sakurazaka46
38
+
39
39
  ```ruby
40
40
  sakura = SakamichiScraper::Sakurazaka.new
41
41
 
42
42
  # get newest title of article
43
- sakura.get_newest_blog_title
43
+ sakura.newest_blog_title
44
44
 
45
45
  # get recent title, member name, timestamp of article
46
- sakura.get_recent_blog_info
46
+ sakura.recent_blog_info
47
+ ```
48
+
49
+ ### get images
50
+ NOTE: Any images obtained by executing the following method will be stored in folder `img/<today(yyyymmdd)>`.
51
+ #### Hinatazaka46
52
+
53
+ ```ruby
54
+ # get images from newest article
55
+ hinata.picture_in_newest_article
56
+ ```
57
+
58
+ #### Sakurazaka46
59
+
60
+ ```ruby
61
+ # get images from newest article
62
+ sakura.picture_in_newest_article
47
63
  ```
48
64
 
49
65
  ## Development
data/config/url.yml CHANGED
@@ -4,4 +4,7 @@ hinatazaka:
4
4
  sakurazaka:
5
5
  blog_top_page: "https://sakurazaka46.com/s/s46/diary/blog"
6
6
  blog_list_page: "https://sakurazaka46.com/s/s46/diary/blog/list"
7
+ exclude_img_path:
8
+ - /files/14/s46/img/com-logo_pc.svg
9
+ - /files/14/s46/img/jasrac.jpg
7
10
  keyakizaka:
@@ -1,14 +1,27 @@
1
+ require "fileutils"
2
+
1
3
  module SakamichiScraper
2
4
  class Base
3
- def init_url(group_name, yml_key)
5
+ def initialize(group_name)
6
+ @group_name = group_name
7
+ @home_page = "https://#{@group_name}46.com"
8
+ end
9
+
10
+ def init_url_from_yml(group_name, yml_key)
4
11
  url = YAML.load_file("config/url.yml")["#{group_name}"]["#{yml_key}"]
5
12
  get_content(url)
6
13
  end
7
14
 
8
15
  def get_content(url)
9
- URI.open(url, "User-Agent" => "Chrome/86.0.4240.80") do |f|
10
- f.read
11
- end
16
+ URI.open(url, "User-Agent" => "Ruby/2.7.1", &:read)
17
+ end
18
+
19
+ def blog_top_page
20
+ init_url_from_yml(@group_name, "blog_top_page")
21
+ end
22
+
23
+ def blog_list_page
24
+ init_url_from_yml(@group_name, "blog_list_page")
12
25
  end
13
26
 
14
27
  def format_content(content)
@@ -16,7 +29,48 @@ module SakamichiScraper
16
29
  end
17
30
 
18
31
  def format_timestamp(datetime)
19
- DateTime.parse(datetime).strftime('%Y-%-m-%-d %-H:%-M')
32
+ DateTime.parse(datetime).strftime("%Y-%-m-%-d %-H:%-M")
33
+ end
34
+
35
+ def exec_date
36
+ @exec_date ||= Time.now.strftime("%Y%m%d")
37
+ end
38
+
39
+ def mkdir_today_file_path
40
+ FileUtils.mkdir_p(image_file_path)
41
+ end
42
+
43
+ def exclude_img_path(group_name)
44
+ YAML.load_file("config/url.yml")["#{group_name}"]["exclude_img_path"]
45
+ end
46
+
47
+ def image_file_path
48
+ "img/#{@group_name}/#{exec_date}"
49
+ end
50
+
51
+ def image_urls_from_article_url(article_html, class_name)
52
+ [].tap do |url|
53
+ Nokogiri.parse(article_html, nil, nil).css("#{class_name} img").each do |c|
54
+ image_url = c.attribute("src").value
55
+ url << case @group_name
56
+ when "sakurazaka"
57
+ "#{@home_page}#{image_url}"
58
+ else
59
+ image_url
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ def download_images_from_url_list(image_urls)
66
+ image_urls.each do |image_url|
67
+ dest_image_path = "#{image_file_path}/#{image_url[%r([^/]+$)]}"
68
+ File.open(dest_image_path, "w") do |pass|
69
+ URI.parse(image_url).open do |img|
70
+ pass.write(img.read)
71
+ end
72
+ end
73
+ end
20
74
  end
21
75
  end
22
76
  end
@@ -1,24 +1,25 @@
1
- require "sakamichi_scraper/base"
1
+ require_relative "base"
2
2
 
3
3
  module SakamichiScraper
4
4
  class Hinatazaka < Base
5
- def get_blog_top_page_title
6
- html = get_blog_top_page
7
- Nokogiri::HTML.parse(html, nil, nil).title
5
+ def initialize
6
+ super("hinatazaka")
8
7
  end
9
8
 
10
- def get_newest_blog_title
11
- html = get_blog_top_page
12
- scraped_title = Nokogiri.parse(html, nil, nil)
9
+ def blog_top_page_title
10
+ Nokogiri::HTML.parse(blog_top_page, nil, nil).title
11
+ end
12
+
13
+ def newest_blog_title
14
+ scraped_title = Nokogiri.parse(blog_top_page, nil, nil)
13
15
  .at_css(".p-blog-main__head > .c-blog-main__title")
14
16
  .content
15
17
  format_content(scraped_title)
16
18
  end
17
19
 
18
- def get_recent_blog_info
20
+ def recent_blog_info
19
21
  res = []
20
- html = get_blog_top_page
21
- Nokogiri.parse(html, nil, nil).css(".p-blog-top__list > li").each do |c|
22
+ Nokogiri.parse(blog_top_page, nil, nil).css(".p-blog-top__list > li").each do |c|
22
23
  info_arr = c.content.strip.split("\n").reject { |i| i.blank? }
23
24
  info = {
24
25
  member: info_arr[0],
@@ -29,10 +30,27 @@ module SakamichiScraper
29
30
  end
30
31
  end
31
32
 
33
+ def picture_in_newest_article
34
+ newest_article_url = article_urls_from_list_page(blog_top_page).first
35
+ article_html = get_content(newest_article_url)
36
+ image_urls = image_urls_from_article_url(article_html, "div.c-blog-article__text")
37
+
38
+ mkdir_today_file_path unless Dir.exist?(image_file_path)
39
+ download_images_from_url_list(image_urls)
40
+ end
41
+
32
42
  private
33
43
 
34
- def get_blog_top_page
35
- init_url("hinatazaka", "blog_top_page")
44
+ def article_urls_from_list_page(html)
45
+ [].tap do |array|
46
+ Nokogiri.parse(html, nil, nil).css(".p-blog-top__list > li").each do |c|
47
+ array << "#{@home_page}#{c.css("a")[0][:href]}".match(/(.*)\?.*$/)[1]
48
+ end
49
+ end
50
+ end
51
+
52
+ def blog_top_page
53
+ init_url_from_yml(@group_name, "blog_top_page")
36
54
  end
37
55
  end
38
56
  end
@@ -1,21 +1,22 @@
1
- require "sakamichi_scraper/base"
1
+ require_relative "base"
2
2
 
3
3
  module SakamichiScraper
4
4
  class Sakurazaka < Base
5
- def get_blog_top_page_title
6
- html = get_blog_top_page
7
- Nokogiri::HTML.parse(html, nil, nil).title
5
+ def initialize
6
+ super("sakurazaka")
8
7
  end
9
8
 
10
- def get_newest_blog_title
11
- html = get_blog_list_page
12
- Nokogiri.parse(html, nil, nil).css(".inner.title-wrap > .title").first.children.to_s
9
+ def blog_top_page_title
10
+ Nokogiri::HTML.parse(blog_top_page, nil, nil).title
13
11
  end
14
12
 
15
- def get_recent_blog_info
13
+ def newest_blog_title
14
+ Nokogiri.parse(blog_list_page, nil, nil).at_css(".date-title > .title").children.to_s
15
+ end
16
+
17
+ def recent_blog_info
16
18
  res = []
17
- html = get_blog_top_page
18
- Nokogiri.parse(html, nil, nil).css(".com-blog-part.box4.fxpc > li").each do |c|
19
+ Nokogiri.parse(blog_list_page, nil, nil).css(".com-blog-part.box4.fxpc > li").each do |c|
19
20
  info = {
20
21
  member: c.css(".prof-in.fx > .name").children.to_s,
21
22
  title: c.css(".date-title > .title").children.to_s,
@@ -25,14 +26,23 @@ module SakamichiScraper
25
26
  end
26
27
  end
27
28
 
28
- private
29
+ def picture_in_newest_article
30
+ newest_article_url = article_urls_from_list_page(blog_list_page).first
31
+ article_html = get_content(newest_article_url)
32
+ image_urls = image_urls_from_article_url(article_html, "div.box-article")
29
33
 
30
- def get_blog_top_page
31
- init_url("sakurazaka", "blog_top_page")
34
+ mkdir_today_file_path unless Dir.exist?(image_file_path)
35
+ download_images_from_url_list(image_urls)
32
36
  end
33
37
 
34
- def get_blog_list_page
35
- init_url("sakurazaka", "blog_list_page")
38
+ private
39
+
40
+ def article_urls_from_list_page(html)
41
+ [].tap do |array|
42
+ Nokogiri.parse(html, nil, nil).css(".com-blog-part.box4.fxpc > li").each do |c|
43
+ array << "#{@home_page}#{c.css("a")[0][:href]}"
44
+ end
45
+ end
36
46
  end
37
47
  end
38
48
  end
@@ -1,3 +1,3 @@
1
1
  module SakamichiScraper
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,5 +1,4 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path("lib", __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "sakamichi_scraper/version"
5
4
 
@@ -9,25 +8,25 @@ Gem::Specification.new do |spec|
9
8
  spec.authors = ["oyrarumakan"]
10
9
  spec.email = ["ryo19911030@hotmail.co.jp"]
11
10
 
12
- spec.summary = %q{ Write a short summary, because RubyGems requires one.}
13
- spec.description = %q{ Write a longer description or delete this line.}
11
+ spec.summary = "scrape gem from Sakamichi group's(Hinatazaka46, Sakurazaka46) blog"
12
+ spec.description = "This gem is used to scrape info from Sakamichi group's(Hinatazaka46, Sakurazaka46) blog."
14
13
  spec.homepage = "https://github.com/oyrarumakan/sakamichi_scraper"
15
14
  spec.license = "MIT"
16
15
 
17
16
  # Specify which files should be added to the gem when it is released.
18
17
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
18
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
20
19
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
20
  end
22
21
  spec.bindir = "exe"
23
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.executables = spec.files.grep(%r(^exe/)) { |f| File.basename(f) }
24
23
  spec.require_paths = ["lib"]
25
24
 
26
25
  spec.add_dependency "activesupport", "6.0.3.4"
27
26
  spec.add_dependency "nokogiri", "1.10.10"
28
27
  spec.add_development_dependency "bundler", "2.1.4"
29
- spec.add_development_dependency "rake", "~> 10.0"
30
- spec.add_development_dependency "rspec", "~> 3.0"
31
28
  spec.add_development_dependency "pry", "0.13.1"
32
-
29
+ spec.add_development_dependency "rake", "~> 12.0"
30
+ spec.add_development_dependency "rspec", "~> 3.0"
31
+ spec.add_development_dependency "rubocop", "1.11.0"
33
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sakamichi_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - oyrarumakan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-10 00:00:00.000000000 Z
11
+ date: 2021-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -52,20 +52,34 @@ dependencies:
52
52
  - - '='
53
53
  - !ruby/object:Gem::Version
54
54
  version: 2.1.4
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.13.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.13.1
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '10.0'
75
+ version: '12.0'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '10.0'
82
+ version: '12.0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -81,20 +95,21 @@ dependencies:
81
95
  - !ruby/object:Gem::Version
82
96
  version: '3.0'
83
97
  - !ruby/object:Gem::Dependency
84
- name: pry
98
+ name: rubocop
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - '='
88
102
  - !ruby/object:Gem::Version
89
- version: 0.13.1
103
+ version: 1.11.0
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - '='
95
109
  - !ruby/object:Gem::Version
96
- version: 0.13.1
97
- description: " Write a longer description or delete this line."
110
+ version: 1.11.0
111
+ description: This gem is used to scrape info from Sakamichi group's(Hinatazaka46,
112
+ Sakurazaka46) blog.
98
113
  email:
99
114
  - ryo19911030@hotmail.co.jp
100
115
  executables: []
@@ -104,6 +119,7 @@ files:
104
119
  - ".github/workflows/ruby.yml"
105
120
  - ".gitignore"
106
121
  - ".rspec"
122
+ - ".rubocop.yml"
107
123
  - ".travis.yml"
108
124
  - CODE_OF_CONDUCT.md
109
125
  - Gemfile
@@ -141,5 +157,5 @@ requirements: []
141
157
  rubygems_version: 3.1.2
142
158
  signing_key:
143
159
  specification_version: 4
144
- summary: Write a short summary, because RubyGems requires one.
160
+ summary: scrape gem from Sakamichi group's(Hinatazaka46, Sakurazaka46) blog
145
161
  test_files: []