Ruby4Misawa 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -3
- data/README.md +58 -0
- data/Ruby4Misawa.gemspec +13 -0
- data/lib/Ruby4Misawa.rb +33 -73
- data/lib/version.rb +3 -0
- metadata +41 -66
- data/README.rdoc +0 -24
- data/Ruby4Misawa-0.1.2.gemspec +0 -13
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3b5ec5d9fb4d7d600949d79b09af92fa0af852fa
|
4
|
+
data.tar.gz: 4e387642d93550fdf83a372cfbc141ae2d8fc968
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9fa104901863b91789f0c440a8b7ea2a5462860ab5e588887596e613a7e565a7f6863593cb2c448f3b8d5dae8e61557c27383a8b1b1b116558d5891bb1601136
|
7
|
+
data.tar.gz: 8f8104fd574b0f0a64549de8a764fd6570882c737e02a5589566d109fa6ff646df691830bdf72888362a372fd7c5f2593a8c95aeff9a5b8c55e3f81c24355f79
|
data/Gemfile
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
source
|
2
|
-
|
3
|
-
|
1
|
+
source 'http://rubygems.org'
|
2
|
+
|
3
|
+
gemspec :name => "Ruby4Misawa"
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# Ruby4Misawa
|
2
|
+
|
3
|
+
## About
|
4
|
+
|
5
|
+
A scraping library for 地獄のミサワ
|
6
|
+
|
7
|
+
This is not compatible with old version.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
add this line to your Gemfile.
|
12
|
+
|
13
|
+
`gem 'Ruby4Misawa'`
|
14
|
+
|
15
|
+
or
|
16
|
+
|
17
|
+
`$ gem install Ruby4Misawa`
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### cid
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'Ruby4Misawa'
|
25
|
+
|
26
|
+
misawa = Misawa.new(32)
|
27
|
+
misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
|
28
|
+
```
|
29
|
+
|
30
|
+
### name
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
require 'Ruby4Misawa'
|
34
|
+
|
35
|
+
misawa = Misawa.new("あつしさん")
|
36
|
+
misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
|
37
|
+
```
|
38
|
+
|
39
|
+
### designating page numebr
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
require 'Ruby4Misawa'
|
43
|
+
|
44
|
+
misawa = Misawa.new("あつしさん", 2)
|
45
|
+
misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
|
46
|
+
```
|
47
|
+
|
48
|
+
## Contributing
|
49
|
+
|
50
|
+
1. fork the project.
|
51
|
+
2. create your feature branch. (`git checkout -b my-feature`)
|
52
|
+
3. commit your changes. (`git commit -am 'commit message.'`)
|
53
|
+
4. push to the branch. (`git push origin my-feature`)
|
54
|
+
5. send pull request.
|
55
|
+
|
56
|
+
## License
|
57
|
+
|
58
|
+
MIT
|
data/Ruby4Misawa.gemspec
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.expand_path("../lib/version", __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new "Ruby4Misawa", Misawa::VERSION do |spec|
|
4
|
+
spec.summary = 'A scraping library for Jigoku no Misawa'
|
5
|
+
spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
|
6
|
+
spec.authors = ['namusyaka']
|
7
|
+
spec.email = 'namusyaka@gmail.com'
|
8
|
+
spec.files = `git ls-files`.split("\n") - %w(.gitignore)
|
9
|
+
spec.test_files = spec.files.select { |path| path =~ /^spec\/.*_spec\.rb/ }
|
10
|
+
spec.license = "MIT"
|
11
|
+
|
12
|
+
spec.add_dependency('nokogiri')
|
13
|
+
end
|
data/lib/Ruby4Misawa.rb
CHANGED
@@ -1,102 +1,62 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
open-uri
|
6
|
-
kconv
|
7
|
-
optparse
|
8
|
-
rubygems
|
9
|
-
nokogiri
|
10
|
-
).each { | name | require name }
|
1
|
+
require 'open-uri'
|
2
|
+
require 'kconv'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'nokogiri'
|
11
5
|
|
12
6
|
class Misawa
|
13
|
-
|
14
|
-
class NotFoundError < ArgumentError; end;
|
15
|
-
|
16
|
-
DOMAIN = 'http://jigokuno.com/'
|
17
|
-
|
7
|
+
DOMAIN = 'http://jigokuno.com/'
|
18
8
|
@@categories = {}
|
19
9
|
|
10
|
+
class NotFoundError < ArgumentError; end
|
11
|
+
|
20
12
|
Nokogiri.HTML(open(DOMAIN).read).css('dl').each do |dl|
|
21
13
|
if dl.children[0].inner_html.toutf8 == '惚れさせ男子達'
|
22
|
-
dl.children[2].children[1].children.to_a.delete_if{|node|
|
14
|
+
dl.children[2].children[1].children.to_a.delete_if{|node|node.class == Nokogiri::XML::Text}.each do |li|
|
23
15
|
a = li.child
|
24
16
|
@@categories[a.text.scan(/(.+?)(?:\([0-9]+\))?$/)[0][0]] = a[:href].scan(/cid=([0-9]+)/)[0][0]
|
25
17
|
end
|
26
18
|
end
|
27
19
|
end
|
28
20
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
@category = @@categories[category]
|
34
|
-
@page = page =~ /^[0-9]+?\.\.[0-9]+?$/ ? eval(page) : page
|
21
|
+
def initialize(name, page = 0)
|
22
|
+
@name = name
|
23
|
+
@cid = @name.is_a?(Integer) ? @name : name_to_cid(@name)
|
24
|
+
@page = page
|
35
25
|
end
|
36
26
|
|
37
27
|
def scrape
|
38
|
-
|
39
|
-
body = get_body(uri)
|
40
|
-
Nokogiri.HTML(body).css('img.pict').map { | img | result << img['src'] } if body
|
41
|
-
result
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
class << self;
|
28
|
+
data = []
|
46
29
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
30
|
+
begin
|
31
|
+
nokogiri = Nokogiri.HTML(open(misawa_uri).read)
|
32
|
+
rescue OpenURI::HTTPError
|
33
|
+
raise NotFoundError
|
52
34
|
end
|
53
35
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
36
|
+
# parse some attributes
|
37
|
+
nokogiri.xpath('//comment()[contains(., "rdf")]').each do |entry|
|
38
|
+
attributes = Nokogiri.XML(entry.to_s.toutf8.gsub(/^<!--|-->$/, "")).child.css('rdf|Description')[0].attributes
|
39
|
+
data << %w[title date identifier].inject({}) do |result, key|
|
40
|
+
result[key.to_sym] = attributes[key].value
|
41
|
+
result
|
58
42
|
end
|
59
43
|
end
|
60
44
|
|
61
|
-
|
62
|
-
|
63
|
-
|
45
|
+
# parse images
|
46
|
+
nokogiri.css('img.pict').to_a.each_with_index { |image, i|
|
47
|
+
data[i].merge!(:image => image['src'], :body => image['alt'])
|
48
|
+
}
|
64
49
|
|
50
|
+
data
|
65
51
|
end
|
66
52
|
|
67
|
-
|
68
|
-
open(URI.encode(uri)).read
|
69
|
-
end
|
53
|
+
private
|
70
54
|
|
71
|
-
def
|
72
|
-
|
73
|
-
base = "#{DOMAIN}/?cid=#{@category}&page="
|
74
|
-
@page.map { | i | "#{base}#{i}" }
|
75
|
-
else
|
76
|
-
["#{DOMAIN}/?cid=#{@category}&page=#{@page}"]
|
77
|
-
end
|
55
|
+
def misawa_uri
|
56
|
+
"#{DOMAIN}?cid=#{@cid}&page=#{@page}"
|
78
57
|
end
|
79
58
|
|
80
|
-
|
81
|
-
|
82
|
-
if $0 === __FILE__
|
83
|
-
|
84
|
-
OptionParser.new do | opt |
|
85
|
-
|
86
|
-
arguments = {}
|
87
|
-
|
88
|
-
opt.on('-t [TYPE]', '--type [TYPE]') do | value |
|
89
|
-
arguments[:type] = value
|
90
|
-
end
|
91
|
-
opt.on('-p [PAGE]', '--page [PAGE]') do | value |
|
92
|
-
arguments[:page] = value
|
93
|
-
end
|
94
|
-
opt.on('-c [CATEGORY]', '--category [CATEGORY]') do | value |
|
95
|
-
arguments[:category] = value
|
96
|
-
end
|
97
|
-
|
98
|
-
opt.parse!(ARGV)
|
99
|
-
Misawa.send("#{arguments[:type] || 'uri'}_misawa", arguments[:category], arguments[:page])
|
59
|
+
def name_to_cid(name)
|
60
|
+
@@categories[name]
|
100
61
|
end
|
101
|
-
|
102
62
|
end
|
data/lib/version.rb
ADDED
metadata
CHANGED
@@ -1,88 +1,63 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: Ruby4Misawa
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 2
|
10
|
-
version: 0.1.2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- namusyaka
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2013-08-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
22
21
|
prerelease: false
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
version: "0"
|
32
|
-
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
description: !binary |
|
35
|
-
5Zyw542E44Gu44Of44K144Ov44OW44Ot44Kw44Gr44GC44KLVVJM44GM44Go
|
36
|
-
44KM44Gf44KK44CB44Gd44Gu44G+44G+55S75YOP44KS5L+d5a2Y44GX44Gf
|
37
|
-
44KK44Gn44GN44G+44GZ44CC
|
38
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description:
|
39
28
|
email: namusyaka@gmail.com
|
40
29
|
executables: []
|
41
|
-
|
42
30
|
extensions: []
|
43
|
-
|
44
31
|
extra_rdoc_files: []
|
45
|
-
|
46
|
-
files:
|
47
|
-
- README.rdoc
|
48
|
-
- Ruby4Misawa-0.1.2.gemspec
|
32
|
+
files:
|
49
33
|
- Gemfile
|
34
|
+
- README.md
|
35
|
+
- Ruby4Misawa.gemspec
|
50
36
|
- lib/Ruby4Misawa.rb
|
37
|
+
- lib/version.rb
|
51
38
|
homepage: https://github.com/namusyaka/Ruby4Misawa
|
52
|
-
licenses:
|
53
|
-
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata: {}
|
54
42
|
post_install_message:
|
55
43
|
rdoc_options: []
|
56
|
-
|
57
|
-
require_paths:
|
44
|
+
require_paths:
|
58
45
|
- lib
|
59
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
none: false
|
70
|
-
requirements:
|
71
|
-
- - ">="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
hash: 3
|
74
|
-
segments:
|
75
|
-
- 0
|
76
|
-
version: "0"
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
77
56
|
requirements: []
|
78
|
-
|
79
57
|
rubyforge_project:
|
80
|
-
rubygems_version:
|
58
|
+
rubygems_version: 2.0.2
|
81
59
|
signing_key:
|
82
|
-
specification_version:
|
83
|
-
summary:
|
84
|
-
44K544Kv44Os44Kk44OU44Oz44Kw44Op44Kk44OW44Op44OqIGZvciDlnLDn
|
85
|
-
jYTjga7jg5/jgrXjg68=
|
86
|
-
|
60
|
+
specification_version: 4
|
61
|
+
summary: A scraping library for Jigoku no Misawa
|
87
62
|
test_files: []
|
88
|
-
|
63
|
+
has_rdoc:
|
data/README.rdoc
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
= Ruby4Misawa
|
2
|
-
|
3
|
-
== これはなにか
|
4
|
-
|
5
|
-
- 地獄のミサワの画像を取ってくるやつ
|
6
|
-
- 画像はURLで出力するか、保存するかを選ぶ
|
7
|
-
|
8
|
-
== インストール
|
9
|
-
|
10
|
-
gem install Ruby4Misawa でおk
|
11
|
-
|
12
|
-
== つかいかた
|
13
|
-
|
14
|
-
libにあるRuby4Misawa.rbを適当に移動して、
|
15
|
-
ruby Ruby4Misawa.rb [オプション] [値]
|
16
|
-
|
17
|
-
[-t [uri, save]] uriは画像のURIを出力、saveは保存しまくる。permission関係でこける可能性はありけり。
|
18
|
-
[-c [カテゴリ名]] ルシフェル や バイトリーダーのように指定。一つしか指定できないので注意。
|
19
|
-
[-p [対象ページ]] 1 や 0のような数値か、0..5のようにRange形式で指定
|
20
|
-
|
21
|
-
--type, --category, --pageでも可
|
22
|
-
|
23
|
-
ちなみに何も指定しないと地獄のミサワのトップページにあるミサワ画像(と思われるもの)を取ってくる
|
24
|
-
|
data/Ruby4Misawa-0.1.2.gemspec
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
# -*- encoding: UTF-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do | spec |
|
4
|
-
spec.name = 'Ruby4Misawa'
|
5
|
-
spec.version = '0.1.2'
|
6
|
-
spec.summary = 'スクレイピングライブラリ for 地獄のミサワ'
|
7
|
-
spec.description = '地獄のミサワブログにあるURLがとれたり、そのまま画像を保存したりできます。'
|
8
|
-
spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
|
9
|
-
spec.author = 'namusyaka'
|
10
|
-
spec.email = 'namusyaka@gmail.com'
|
11
|
-
spec.files = %w( README.rdoc Ruby4Misawa-0.1.2.gemspec Gemfile lib lib/Ruby4Misawa.rb )
|
12
|
-
spec.add_development_dependency('nokogiri')
|
13
|
-
end
|