ncode-syosetu-core 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d633f10a2926b8b9ac37e4c5b41be5755ca487b3
4
- data.tar.gz: ccf4f465dc7dcfa57aac78c05e282589e464190f
2
+ SHA256:
3
+ metadata.gz: ced6daefa3566dd623f1b39e96f8d8dc863517d5acb49a0d0c59888162af4788
4
+ data.tar.gz: 46a2c39df0910ac0feed9f24bb681ee4a8a85d192eb786f5a7d95238d1f16cbc
5
5
  SHA512:
6
- metadata.gz: 6c5cb168e34e4d49a919ec341f6ce9328f6ff580b713fafd8ee7380f88306a558cb77fff6b88d7f7c0c36c1d19c13b2322cd2379a18ef2ffce528969786c391c
7
- data.tar.gz: 7320f221b49eadf945b893986c6d56cd4204bfb826e673c5fecfd0bd2b6319c3cefcfeab0f8a070c284ebaa5b55e561bfdb6751b065ae05d892755eca7d86f36
6
+ metadata.gz: 2bfc117c976f50af6b269786bf3caf42f7e207d1b6a0d94f87333d4f533c6e88bcbf917b6017d60b5ce312ffeecc7ce986874fb8062a5a0712fff19550c9d5df
7
+ data.tar.gz: 6f99a37a4d659c7c341679bf8e29de50d29caf8c5a88e730320f1e1da2a64772d2f72c1cee59bc82b4c1e925debf0028a23bcb54862715838786dbecedd67967
data/exe/ncode2md ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
4
+ require 'ncode-syosetu-core'
5
+
6
+ if ARGV.size == 0
7
+ puts "#$0 ncode [ncode...]"
8
+ exit
9
+ end
10
+
11
+ ARGV.each do |arg|
12
+ parsed = NcodeSyosetu.parse_url(arg)
13
+ novel = NcodeSyosetu.client(host: parsed[:host]).get(parsed[:ncode])
14
+ File.write("#{parsed[:ncode]}.md", novel.markdown)
15
+ end
data/exe/ncode2text ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
4
+ require 'ncode-syosetu-core'
5
+
6
+ if ARGV.size == 0
7
+ puts "#$0 ncode [ncode...]"
8
+ exit
9
+ end
10
+
11
+ ARGV.each do |arg|
12
+ parsed = NcodeSyosetu.parse_url(arg)
13
+ novel = NcodeSyosetu.client(host: parsed[:host]).get(parsed[:ncode])
14
+ File.write("#{parsed[:ncode]}.txt", novel.text)
15
+ end
@@ -12,10 +12,16 @@ module NcodeSyosetu
12
12
  end
13
13
  end
14
14
 
15
- def initialize(logger: Logger.new(STDOUT), sleep: 0.5)
15
+ def initialize(host: NcodeSyosetu::NCODE_HOST_NAME, logger: Logger.new(STDOUT), sleep: 0.5)
16
+ @host = host
16
17
  @mechanize = Mechanize.new
17
18
  @logger = logger
18
19
  @sleep = sleep
20
+
21
+ if @host == NcodeSyosetu::NOVEL18_HOST_NAME
22
+ cookie = Mechanize::Cookie.new("over18", "yes", domain: ".syosetu.com", path: "/", expires: Time.now + 86400 * 365)
23
+ @mechanize.cookie_jar.add(cookie)
24
+ end
19
25
  end
20
26
 
21
27
  def get(ncode)
@@ -43,11 +49,11 @@ module NcodeSyosetu
43
49
  end
44
50
 
45
51
  def toc_url(ncode)
46
- "http://#{NcodeSyosetu::NCODE_HOST_NAME}/#{ncode}"
52
+ "https://#{@host}/#{ncode}"
47
53
  end
48
54
 
49
55
  def episode_url(ncode, number)
50
- "http://#{NcodeSyosetu::NCODE_HOST_NAME}/#{ncode}/#{number}"
56
+ "https://#{@host}/#{ncode}/#{number}"
51
57
  end
52
58
 
53
59
  private
@@ -1,4 +1,5 @@
1
1
  require "erb"
2
+ require "nokogiri"
2
3
 
3
4
  module NcodeSyosetu
4
5
  module Model
@@ -11,8 +12,8 @@ module NcodeSyosetu
11
12
  @number = number
12
13
 
13
14
  @body_html =
14
- page.search(".novel_subtitle").to_html <<
15
- page.search(".novel_view").to_html
15
+ page.search(".p-novel__title").to_html <<
16
+ page.search(".p-novel__body").to_html
16
17
  end
17
18
 
18
19
  def html
@@ -32,6 +33,51 @@ module NcodeSyosetu
32
33
  </html>
33
34
  HTML
34
35
  end
36
+
37
+ def text
38
+ doc = Nokogiri::HTML.fragment(@body_html)
39
+ lines = []
40
+ lines << doc.at(".p-novel__title")&.text&.strip
41
+ doc.search(".p-novel__text p").each do |p|
42
+ lines << p.text
43
+ end
44
+ lines.compact.join("\n")
45
+ end
46
+
47
+ def markdown
48
+ doc = Nokogiri::HTML.fragment(@body_html)
49
+ lines = []
50
+ title_text = doc.at(".p-novel__title")&.text&.strip
51
+ lines << "## #{title_text}" if title_text
52
+ lines << ""
53
+ doc.search(".p-novel__text p").each do |p|
54
+ lines << ruby_to_markdown(p)
55
+ end
56
+ lines.join("\n")
57
+ end
58
+
59
+ private
60
+
61
+ def ruby_to_markdown(node)
62
+ result = +""
63
+ node.children.each do |child|
64
+ case child.name
65
+ when "ruby"
66
+ base = child.search("rb").first&.text || child.children.select(&:text?).map(&:text).join
67
+ rt = child.at("rt")&.text
68
+ if rt && !rt.empty?
69
+ result << "#{base}(#{rt})"
70
+ else
71
+ result << base
72
+ end
73
+ when "text"
74
+ result << child.text
75
+ else
76
+ result << child.text
77
+ end
78
+ end
79
+ result
80
+ end
35
81
  end
36
82
  end
37
83
  end
@@ -6,6 +6,14 @@ module NcodeSyosetu
6
6
  def initialize(title)
7
7
  @title = title
8
8
  end
9
+
10
+ def text
11
+ title.strip
12
+ end
13
+
14
+ def markdown
15
+ "# #{title.strip}"
16
+ end
9
17
  end
10
18
  end
11
19
  end
@@ -15,6 +15,31 @@ module NcodeSyosetu
15
15
  end
16
16
  EOS
17
17
  end
18
+
19
+ def text
20
+ lines = []
21
+ lines << title
22
+ lines << author
23
+ lines << ""
24
+ episodes.each do |episode|
25
+ lines << episode.text
26
+ lines << ""
27
+ end
28
+ lines.join("\n")
29
+ end
30
+
31
+ def markdown
32
+ lines = []
33
+ lines << "# #{title}"
34
+ lines << ""
35
+ lines << "*#{author}*"
36
+ lines << ""
37
+ episodes.each do |episode|
38
+ lines << episode.markdown
39
+ lines << ""
40
+ end
41
+ lines.join("\n")
42
+ end
18
43
  end
19
44
  end
20
45
  end
@@ -6,12 +6,12 @@ module NcodeSyosetu
6
6
  def initialize(page)
7
7
  @url = page.uri.to_s
8
8
  @title = page.title
9
- @author = page.search(".novel_writername").text.chomp
10
- @abstract = page.search(".novel_ex").text.chomp
9
+ @author = page.search(".p-novel__author").text.chomp
10
+ @abstract = page.search(".p-novel__summary").text.chomp
11
11
 
12
12
  @episodes = []
13
- page.at(".index_box").children.each do |sub_item|
14
- next unless sub_item.matches?('.chapter_title, .novel_sublist2')
13
+ page.at(".p-eplist").children.each do |sub_item|
14
+ next unless sub_item.matches?('.p-eplist__chapter-title, .p-eplist__sublist')
15
15
  episode = { text: sub_item.text.gsub(/\s+/, " ").chomp }
16
16
  link = sub_item.search("a")
17
17
  unless link.empty?
@@ -23,9 +23,9 @@ module NcodeSyosetu
23
23
  end
24
24
 
25
25
  @body_html =
26
- page.search(".novel_writername").to_html <<
27
- page.search(".novel_ex").to_html <<
28
- page.search(".index_box").to_html
26
+ page.search(".p-novel__author").to_html <<
27
+ page.search(".p-novel__summary").to_html <<
28
+ page.search(".p-eplist").to_html
29
29
  end
30
30
 
31
31
  def html
@@ -1,3 +1,3 @@
1
1
  module NcodeSyosetu
2
- VERSION = "0.2.3"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/ncode_syosetu.rb CHANGED
@@ -5,8 +5,18 @@ require "ncode_syosetu/builder"
5
5
 
6
6
  module NcodeSyosetu
7
7
  NCODE_HOST_NAME = "ncode.syosetu.com"
8
+ NOVEL18_HOST_NAME = "novel18.syosetu.com"
8
9
 
9
- def self.client
10
- @@client ||= Client.new
10
+ def self.client(host: NCODE_HOST_NAME)
11
+ @@clients ||= {}
12
+ @@clients[host] ||= Client.new(host: host)
13
+ end
14
+
15
+ def self.parse_url(url_or_ncode)
16
+ if url_or_ncode =~ %r{\Ahttps?://([^/]+)/([^/]+)}
17
+ { host: $1, ncode: $2 }
18
+ else
19
+ { host: NCODE_HOST_NAME, ncode: url_or_ncode }
20
+ end
11
21
  end
12
22
  end
@@ -18,9 +18,8 @@ Gem::Specification.new do |spec|
18
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency "mechanize", "2.7.2"
21
+ spec.add_dependency "mechanize", "~> 2.14"
22
22
 
23
- spec.add_development_dependency "bundler", "~> 1.13"
24
- spec.add_development_dependency "rake", "~> 10.0"
25
- spec.add_development_dependency "rspec", "~> 3.0"
23
+ spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "rspec", "~> 3.13"
26
25
  end
metadata CHANGED
@@ -1,75 +1,63 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ncode-syosetu-core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - hogelog
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-14 00:00:00.000000000 Z
11
+ date: 2026-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '='
18
- - !ruby/object:Gem::Version
19
- version: 2.7.2
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - '='
25
- - !ruby/object:Gem::Version
26
- version: 2.7.2
27
- - !ruby/object:Gem::Dependency
28
- name: bundler
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
17
  - - "~>"
32
18
  - !ruby/object:Gem::Version
33
- version: '1.13'
34
- type: :development
19
+ version: '2.14'
20
+ type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
24
  - - "~>"
39
25
  - !ruby/object:Gem::Version
40
- version: '1.13'
26
+ version: '2.14'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: rake
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - "~>"
31
+ - - ">="
46
32
  - !ruby/object:Gem::Version
47
- version: '10.0'
33
+ version: '0'
48
34
  type: :development
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - "~>"
38
+ - - ">="
53
39
  - !ruby/object:Gem::Version
54
- version: '10.0'
40
+ version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: rspec
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
45
  - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '3.0'
47
+ version: '3.13'
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '3.0'
69
- description:
54
+ version: '3.13'
55
+ description:
70
56
  email:
71
57
  - konbu.komuro@gmail.com
72
- executables: []
58
+ executables:
59
+ - ncode2md
60
+ - ncode2text
73
61
  extensions: []
74
62
  extra_rdoc_files: []
75
63
  files:
@@ -80,6 +68,8 @@ files:
80
68
  - Rakefile
81
69
  - bin/console
82
70
  - bin/setup
71
+ - exe/ncode2md
72
+ - exe/ncode2text
83
73
  - lib/ncode-syosetu-core.rb
84
74
  - lib/ncode_syosetu.rb
85
75
  - lib/ncode_syosetu/builder.rb
@@ -95,7 +85,7 @@ homepage: https://github.com/hogelog/ncode-syosetu
95
85
  licenses:
96
86
  - MIT
97
87
  metadata: {}
98
- post_install_message:
88
+ post_install_message:
99
89
  rdoc_options: []
100
90
  require_paths:
101
91
  - lib
@@ -110,9 +100,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
100
  - !ruby/object:Gem::Version
111
101
  version: '0'
112
102
  requirements: []
113
- rubyforge_project:
114
- rubygems_version: 2.6.8
115
- signing_key:
103
+ rubygems_version: 3.5.22
104
+ signing_key:
116
105
  specification_version: 4
117
106
  summary: Ncode syosetu scraper
118
107
  test_files: []