Ruby4Misawa 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -3
- data/README.md +58 -0
- data/Ruby4Misawa.gemspec +13 -0
- data/lib/Ruby4Misawa.rb +33 -73
- data/lib/version.rb +3 -0
- metadata +41 -66
- data/README.rdoc +0 -24
- data/Ruby4Misawa-0.1.2.gemspec +0 -13
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3b5ec5d9fb4d7d600949d79b09af92fa0af852fa
|
4
|
+
data.tar.gz: 4e387642d93550fdf83a372cfbc141ae2d8fc968
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9fa104901863b91789f0c440a8b7ea2a5462860ab5e588887596e613a7e565a7f6863593cb2c448f3b8d5dae8e61557c27383a8b1b1b116558d5891bb1601136
|
7
|
+
data.tar.gz: 8f8104fd574b0f0a64549de8a764fd6570882c737e02a5589566d109fa6ff646df691830bdf72888362a372fd7c5f2593a8c95aeff9a5b8c55e3f81c24355f79
|
data/Gemfile
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
source
|
2
|
-
|
3
|
-
|
1
|
+
source 'http://rubygems.org'
|
2
|
+
|
3
|
+
gemspec :name => "Ruby4Misawa"
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# Ruby4Misawa
|
2
|
+
|
3
|
+
## About
|
4
|
+
|
5
|
+
A scraping library for 地獄のミサワ
|
6
|
+
|
7
|
+
This is not compatible with old version.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
add this line to your Gemfile.
|
12
|
+
|
13
|
+
`gem 'Ruby4Misawa'`
|
14
|
+
|
15
|
+
or
|
16
|
+
|
17
|
+
`$ gem install Ruby4Misawa`
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### cid
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'Ruby4Misawa'
|
25
|
+
|
26
|
+
misawa = Misawa.new(32)
|
27
|
+
misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
|
28
|
+
```
|
29
|
+
|
30
|
+
### name
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
require 'Ruby4Misawa'
|
34
|
+
|
35
|
+
misawa = Misawa.new("あつしさん")
|
36
|
+
misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
|
37
|
+
```
|
38
|
+
|
39
|
+
### designating page numebr
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
require 'Ruby4Misawa'
|
43
|
+
|
44
|
+
misawa = Misawa.new("あつしさん", 2)
|
45
|
+
misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
|
46
|
+
```
|
47
|
+
|
48
|
+
## Contributing
|
49
|
+
|
50
|
+
1. fork the project.
|
51
|
+
2. create your feature branch. (`git checkout -b my-feature`)
|
52
|
+
3. commit your changes. (`git commit -am 'commit message.'`)
|
53
|
+
4. push to the branch. (`git push origin my-feature`)
|
54
|
+
5. send pull request.
|
55
|
+
|
56
|
+
## License
|
57
|
+
|
58
|
+
MIT
|
data/Ruby4Misawa.gemspec
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.expand_path("../lib/version", __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new "Ruby4Misawa", Misawa::VERSION do |spec|
|
4
|
+
spec.summary = 'A scraping library for Jigoku no Misawa'
|
5
|
+
spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
|
6
|
+
spec.authors = ['namusyaka']
|
7
|
+
spec.email = 'namusyaka@gmail.com'
|
8
|
+
spec.files = `git ls-files`.split("\n") - %w(.gitignore)
|
9
|
+
spec.test_files = spec.files.select { |path| path =~ /^spec\/.*_spec\.rb/ }
|
10
|
+
spec.license = "MIT"
|
11
|
+
|
12
|
+
spec.add_dependency('nokogiri')
|
13
|
+
end
|
data/lib/Ruby4Misawa.rb
CHANGED
@@ -1,102 +1,62 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
open-uri
|
6
|
-
kconv
|
7
|
-
optparse
|
8
|
-
rubygems
|
9
|
-
nokogiri
|
10
|
-
).each { | name | require name }
|
1
|
+
require 'open-uri'
|
2
|
+
require 'kconv'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'nokogiri'
|
11
5
|
|
12
6
|
class Misawa
|
13
|
-
|
14
|
-
class NotFoundError < ArgumentError; end;
|
15
|
-
|
16
|
-
DOMAIN = 'http://jigokuno.com/'
|
17
|
-
|
7
|
+
DOMAIN = 'http://jigokuno.com/'
|
18
8
|
@@categories = {}
|
19
9
|
|
10
|
+
class NotFoundError < ArgumentError; end
|
11
|
+
|
20
12
|
Nokogiri.HTML(open(DOMAIN).read).css('dl').each do |dl|
|
21
13
|
if dl.children[0].inner_html.toutf8 == '惚れさせ男子達'
|
22
|
-
dl.children[2].children[1].children.to_a.delete_if{|node|
|
14
|
+
dl.children[2].children[1].children.to_a.delete_if{|node|node.class == Nokogiri::XML::Text}.each do |li|
|
23
15
|
a = li.child
|
24
16
|
@@categories[a.text.scan(/(.+?)(?:\([0-9]+\))?$/)[0][0]] = a[:href].scan(/cid=([0-9]+)/)[0][0]
|
25
17
|
end
|
26
18
|
end
|
27
19
|
end
|
28
20
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
@category = @@categories[category]
|
34
|
-
@page = page =~ /^[0-9]+?\.\.[0-9]+?$/ ? eval(page) : page
|
21
|
+
def initialize(name, page = 0)
|
22
|
+
@name = name
|
23
|
+
@cid = @name.is_a?(Integer) ? @name : name_to_cid(@name)
|
24
|
+
@page = page
|
35
25
|
end
|
36
26
|
|
37
27
|
def scrape
|
38
|
-
|
39
|
-
body = get_body(uri)
|
40
|
-
Nokogiri.HTML(body).css('img.pict').map { | img | result << img['src'] } if body
|
41
|
-
result
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
class << self;
|
28
|
+
data = []
|
46
29
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
30
|
+
begin
|
31
|
+
nokogiri = Nokogiri.HTML(open(misawa_uri).read)
|
32
|
+
rescue OpenURI::HTTPError
|
33
|
+
raise NotFoundError
|
52
34
|
end
|
53
35
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
36
|
+
# parse some attributes
|
37
|
+
nokogiri.xpath('//comment()[contains(., "rdf")]').each do |entry|
|
38
|
+
attributes = Nokogiri.XML(entry.to_s.toutf8.gsub(/^<!--|-->$/, "")).child.css('rdf|Description')[0].attributes
|
39
|
+
data << %w[title date identifier].inject({}) do |result, key|
|
40
|
+
result[key.to_sym] = attributes[key].value
|
41
|
+
result
|
58
42
|
end
|
59
43
|
end
|
60
44
|
|
61
|
-
|
62
|
-
|
63
|
-
|
45
|
+
# parse images
|
46
|
+
nokogiri.css('img.pict').to_a.each_with_index { |image, i|
|
47
|
+
data[i].merge!(:image => image['src'], :body => image['alt'])
|
48
|
+
}
|
64
49
|
|
50
|
+
data
|
65
51
|
end
|
66
52
|
|
67
|
-
|
68
|
-
open(URI.encode(uri)).read
|
69
|
-
end
|
53
|
+
private
|
70
54
|
|
71
|
-
def
|
72
|
-
|
73
|
-
base = "#{DOMAIN}/?cid=#{@category}&page="
|
74
|
-
@page.map { | i | "#{base}#{i}" }
|
75
|
-
else
|
76
|
-
["#{DOMAIN}/?cid=#{@category}&page=#{@page}"]
|
77
|
-
end
|
55
|
+
def misawa_uri
|
56
|
+
"#{DOMAIN}?cid=#{@cid}&page=#{@page}"
|
78
57
|
end
|
79
58
|
|
80
|
-
|
81
|
-
|
82
|
-
if $0 === __FILE__
|
83
|
-
|
84
|
-
OptionParser.new do | opt |
|
85
|
-
|
86
|
-
arguments = {}
|
87
|
-
|
88
|
-
opt.on('-t [TYPE]', '--type [TYPE]') do | value |
|
89
|
-
arguments[:type] = value
|
90
|
-
end
|
91
|
-
opt.on('-p [PAGE]', '--page [PAGE]') do | value |
|
92
|
-
arguments[:page] = value
|
93
|
-
end
|
94
|
-
opt.on('-c [CATEGORY]', '--category [CATEGORY]') do | value |
|
95
|
-
arguments[:category] = value
|
96
|
-
end
|
97
|
-
|
98
|
-
opt.parse!(ARGV)
|
99
|
-
Misawa.send("#{arguments[:type] || 'uri'}_misawa", arguments[:category], arguments[:page])
|
59
|
+
def name_to_cid(name)
|
60
|
+
@@categories[name]
|
100
61
|
end
|
101
|
-
|
102
62
|
end
|
data/lib/version.rb
ADDED
metadata
CHANGED
@@ -1,88 +1,63 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: Ruby4Misawa
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 2
|
10
|
-
version: 0.1.2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- namusyaka
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2013-08-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
22
21
|
prerelease: false
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
version: "0"
|
32
|
-
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
description: !binary |
|
35
|
-
5Zyw542E44Gu44Of44K144Ov44OW44Ot44Kw44Gr44GC44KLVVJM44GM44Go
|
36
|
-
44KM44Gf44KK44CB44Gd44Gu44G+44G+55S75YOP44KS5L+d5a2Y44GX44Gf
|
37
|
-
44KK44Gn44GN44G+44GZ44CC
|
38
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description:
|
39
28
|
email: namusyaka@gmail.com
|
40
29
|
executables: []
|
41
|
-
|
42
30
|
extensions: []
|
43
|
-
|
44
31
|
extra_rdoc_files: []
|
45
|
-
|
46
|
-
files:
|
47
|
-
- README.rdoc
|
48
|
-
- Ruby4Misawa-0.1.2.gemspec
|
32
|
+
files:
|
49
33
|
- Gemfile
|
34
|
+
- README.md
|
35
|
+
- Ruby4Misawa.gemspec
|
50
36
|
- lib/Ruby4Misawa.rb
|
37
|
+
- lib/version.rb
|
51
38
|
homepage: https://github.com/namusyaka/Ruby4Misawa
|
52
|
-
licenses:
|
53
|
-
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata: {}
|
54
42
|
post_install_message:
|
55
43
|
rdoc_options: []
|
56
|
-
|
57
|
-
require_paths:
|
44
|
+
require_paths:
|
58
45
|
- lib
|
59
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
none: false
|
70
|
-
requirements:
|
71
|
-
- - ">="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
hash: 3
|
74
|
-
segments:
|
75
|
-
- 0
|
76
|
-
version: "0"
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
77
56
|
requirements: []
|
78
|
-
|
79
57
|
rubyforge_project:
|
80
|
-
rubygems_version:
|
58
|
+
rubygems_version: 2.0.2
|
81
59
|
signing_key:
|
82
|
-
specification_version:
|
83
|
-
summary:
|
84
|
-
44K544Kv44Os44Kk44OU44Oz44Kw44Op44Kk44OW44Op44OqIGZvciDlnLDn
|
85
|
-
jYTjga7jg5/jgrXjg68=
|
86
|
-
|
60
|
+
specification_version: 4
|
61
|
+
summary: A scraping library for Jigoku no Misawa
|
87
62
|
test_files: []
|
88
|
-
|
63
|
+
has_rdoc:
|
data/README.rdoc
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
= Ruby4Misawa
|
2
|
-
|
3
|
-
== これはなにか
|
4
|
-
|
5
|
-
- 地獄のミサワの画像を取ってくるやつ
|
6
|
-
- 画像はURLで出力するか、保存するかを選ぶ
|
7
|
-
|
8
|
-
== インストール
|
9
|
-
|
10
|
-
gem install Ruby4Misawa でおk
|
11
|
-
|
12
|
-
== つかいかた
|
13
|
-
|
14
|
-
libにあるRuby4Misawa.rbを適当に移動して、
|
15
|
-
ruby Ruby4Misawa.rb [オプション] [値]
|
16
|
-
|
17
|
-
[-t [uri, save]] uriは画像のURIを出力、saveは保存しまくる。permission関係でこける可能性はありけり。
|
18
|
-
[-c [カテゴリ名]] ルシフェル や バイトリーダーのように指定。一つしか指定できないので注意。
|
19
|
-
[-p [対象ページ]] 1 や 0のような数値か、0..5のようにRange形式で指定
|
20
|
-
|
21
|
-
--type, --category, --pageでも可
|
22
|
-
|
23
|
-
ちなみに何も指定しないと地獄のミサワのトップページにあるミサワ画像(と思われるもの)を取ってくる
|
24
|
-
|
data/Ruby4Misawa-0.1.2.gemspec
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
# -*- encoding: UTF-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do | spec |
|
4
|
-
spec.name = 'Ruby4Misawa'
|
5
|
-
spec.version = '0.1.2'
|
6
|
-
spec.summary = 'スクレイピングライブラリ for 地獄のミサワ'
|
7
|
-
spec.description = '地獄のミサワブログにあるURLがとれたり、そのまま画像を保存したりできます。'
|
8
|
-
spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
|
9
|
-
spec.author = 'namusyaka'
|
10
|
-
spec.email = 'namusyaka@gmail.com'
|
11
|
-
spec.files = %w( README.rdoc Ruby4Misawa-0.1.2.gemspec Gemfile lib lib/Ruby4Misawa.rb )
|
12
|
-
spec.add_development_dependency('nokogiri')
|
13
|
-
end
|