Ruby4Misawa 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3b5ec5d9fb4d7d600949d79b09af92fa0af852fa
4
+ data.tar.gz: 4e387642d93550fdf83a372cfbc141ae2d8fc968
5
+ SHA512:
6
+ metadata.gz: 9fa104901863b91789f0c440a8b7ea2a5462860ab5e588887596e613a7e565a7f6863593cb2c448f3b8d5dae8e61557c27383a8b1b1b116558d5891bb1601136
7
+ data.tar.gz: 8f8104fd574b0f0a64549de8a764fd6570882c737e02a5589566d109fa6ff646df691830bdf72888362a372fd7c5f2593a8c95aeff9a5b8c55e3f81c24355f79
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
- source :rubygems
2
- gemspec :name => 'Ruby4Misawa'
3
- gem 'nokogiri', '1.5.5'
1
+ source 'http://rubygems.org'
2
+
3
+ gemspec :name => "Ruby4Misawa"
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Ruby4Misawa
2
+
3
+ ## About
4
+
5
+ A scraping library for 地獄のミサワ
6
+
7
+ This is not compatible with old version.
8
+
9
+ ## Installation
10
+
11
+ add this line to your Gemfile.
12
+
13
+ `gem 'Ruby4Misawa'`
14
+
15
+ or
16
+
17
+ `$ gem install Ruby4Misawa`
18
+
19
+ ## Usage
20
+
21
+ ### cid
22
+
23
+ ```ruby
24
+ require 'Ruby4Misawa'
25
+
26
+ misawa = Misawa.new(32)
27
+ misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
28
+ ```
29
+
30
+ ### name
31
+
32
+ ```ruby
33
+ require 'Ruby4Misawa'
34
+
35
+ misawa = Misawa.new("あつしさん")
36
+ misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
37
+ ```
38
+
39
+ ### designating page numebr
40
+
41
+ ```ruby
42
+ require 'Ruby4Misawa'
43
+
44
+ misawa = Misawa.new("あつしさん", 2)
45
+ misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
46
+ ```
47
+
48
+ ## Contributing
49
+
50
+ 1. fork the project.
51
+ 2. create your feature branch. (`git checkout -b my-feature`)
52
+ 3. commit your changes. (`git commit -am 'commit message.'`)
53
+ 4. push to the branch. (`git push origin my-feature`)
54
+ 5. send pull request.
55
+
56
+ ## License
57
+
58
+ MIT
@@ -0,0 +1,13 @@
1
+ require File.expand_path("../lib/version", __FILE__)
2
+
3
+ Gem::Specification.new "Ruby4Misawa", Misawa::VERSION do |spec|
4
+ spec.summary = 'A scraping library for Jigoku no Misawa'
5
+ spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
6
+ spec.authors = ['namusyaka']
7
+ spec.email = 'namusyaka@gmail.com'
8
+ spec.files = `git ls-files`.split("\n") - %w(.gitignore)
9
+ spec.test_files = spec.files.select { |path| path =~ /^spec\/.*_spec\.rb/ }
10
+ spec.license = "MIT"
11
+
12
+ spec.add_dependency('nokogiri')
13
+ end
data/lib/Ruby4Misawa.rb CHANGED
@@ -1,102 +1,62 @@
1
- # -*- encoding: UTF-8 -*-
2
-
3
- %w(
4
- uri
5
- open-uri
6
- kconv
7
- optparse
8
- rubygems
9
- nokogiri
10
- ).each { | name | require name }
1
+ require 'open-uri'
2
+ require 'kconv'
3
+ require 'rubygems'
4
+ require 'nokogiri'
11
5
 
12
6
  class Misawa
13
-
14
- class NotFoundError < ArgumentError; end;
15
-
16
- DOMAIN = 'http://jigokuno.com/'
17
-
7
+ DOMAIN = 'http://jigokuno.com/'
18
8
  @@categories = {}
19
9
 
10
+ class NotFoundError < ArgumentError; end
11
+
20
12
  Nokogiri.HTML(open(DOMAIN).read).css('dl').each do |dl|
21
13
  if dl.children[0].inner_html.toutf8 == '惚れさせ男子達'
22
- dl.children[2].children[1].children.to_a.delete_if{|node| node.class == Nokogiri::XML::Text}.each do |li|
14
+ dl.children[2].children[1].children.to_a.delete_if{|node|node.class == Nokogiri::XML::Text}.each do |li|
23
15
  a = li.child
24
16
  @@categories[a.text.scan(/(.+?)(?:\([0-9]+\))?$/)[0][0]] = a[:href].scan(/cid=([0-9]+)/)[0][0]
25
17
  end
26
18
  end
27
19
  end
28
20
 
29
- attr_reader :category, :page
30
-
31
- def initialize(category, page = 0)
32
- raise NotFoundError unless @@categories[category]
33
- @category = @@categories[category]
34
- @page = page =~ /^[0-9]+?\.\.[0-9]+?$/ ? eval(page) : page
21
+ def initialize(name, page = 0)
22
+ @name = name
23
+ @cid = @name.is_a?(Integer) ? @name : name_to_cid(@name)
24
+ @page = page
35
25
  end
36
26
 
37
27
  def scrape
38
- create_uri.inject([]) do | result, uri |
39
- body = get_body(uri)
40
- Nokogiri.HTML(body).css('img.pict').map { | img | result << img['src'] } if body
41
- result
42
- end
43
- end
44
-
45
- class << self;
28
+ data = []
46
29
 
47
- def save_misawa(category, page)
48
- misawa = self.new(category, page)
49
- misawa.scrape.each_with_index do | src, i |
50
- File.open("#{misawa.category}-#{i}.gif", 'w') { | f | f.write(misawa.get_body(src)) }
51
- end
30
+ begin
31
+ nokogiri = Nokogiri.HTML(open(misawa_uri).read)
32
+ rescue OpenURI::HTTPError
33
+ raise NotFoundError
52
34
  end
53
35
 
54
- def uri_misawa(category, page)
55
- misawa = self.new(category, page)
56
- misawa.scrape.each_with_index do | src, i |
57
- puts "#{misawa.category}-#{i} : #{src} "
36
+ # parse some attributes
37
+ nokogiri.xpath('//comment()[contains(., "rdf")]').each do |entry|
38
+ attributes = Nokogiri.XML(entry.to_s.toutf8.gsub(/^<!--|-->$/, "")).child.css('rdf|Description')[0].attributes
39
+ data << %w[title date identifier].inject({}) do |result, key|
40
+ result[key.to_sym] = attributes[key].value
41
+ result
58
42
  end
59
43
  end
60
44
 
61
- def method_missing(name, *args)
62
- raise ArgumentError;
63
- end
45
+ # parse images
46
+ nokogiri.css('img.pict').to_a.each_with_index { |image, i|
47
+ data[i].merge!(:image => image['src'], :body => image['alt'])
48
+ }
64
49
 
50
+ data
65
51
  end
66
52
 
67
- def get_body(uri)
68
- open(URI.encode(uri)).read
69
- end
53
+ private
70
54
 
71
- def create_uri
72
- if @page.is_a?(Range)
73
- base = "#{DOMAIN}/?cid=#{@category}&page="
74
- @page.map { | i | "#{base}#{i}" }
75
- else
76
- ["#{DOMAIN}/?cid=#{@category}&page=#{@page}"]
77
- end
55
+ def misawa_uri
56
+ "#{DOMAIN}?cid=#{@cid}&page=#{@page}"
78
57
  end
79
58
 
80
- end
81
-
82
- if $0 === __FILE__
83
-
84
- OptionParser.new do | opt |
85
-
86
- arguments = {}
87
-
88
- opt.on('-t [TYPE]', '--type [TYPE]') do | value |
89
- arguments[:type] = value
90
- end
91
- opt.on('-p [PAGE]', '--page [PAGE]') do | value |
92
- arguments[:page] = value
93
- end
94
- opt.on('-c [CATEGORY]', '--category [CATEGORY]') do | value |
95
- arguments[:category] = value
96
- end
97
-
98
- opt.parse!(ARGV)
99
- Misawa.send("#{arguments[:type] || 'uri'}_misawa", arguments[:category], arguments[:page])
59
+ def name_to_cid(name)
60
+ @@categories[name]
100
61
  end
101
-
102
62
  end
data/lib/version.rb ADDED
@@ -0,0 +1,3 @@
1
+ class Misawa
2
+ VERSION = '0.1.4'
3
+ end
metadata CHANGED
@@ -1,88 +1,63 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: Ruby4Misawa
3
- version: !ruby/object:Gem::Version
4
- hash: 31
5
- prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 2
10
- version: 0.1.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - namusyaka
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2012-09-29 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-08-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
21
14
  name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
22
21
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 3
29
- segments:
30
- - 0
31
- version: "0"
32
- type: :development
33
- version_requirements: *id001
34
- description: !binary |
35
- 5Zyw542E44Gu44Of44K144Ov44OW44Ot44Kw44Gr44GC44KLVVJM44GM44Go
36
- 44KM44Gf44KK44CB44Gd44Gu44G+44G+55S75YOP44KS5L+d5a2Y44GX44Gf
37
- 44KK44Gn44GN44G+44GZ44CC
38
-
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description:
39
28
  email: namusyaka@gmail.com
40
29
  executables: []
41
-
42
30
  extensions: []
43
-
44
31
  extra_rdoc_files: []
45
-
46
- files:
47
- - README.rdoc
48
- - Ruby4Misawa-0.1.2.gemspec
32
+ files:
49
33
  - Gemfile
34
+ - README.md
35
+ - Ruby4Misawa.gemspec
50
36
  - lib/Ruby4Misawa.rb
37
+ - lib/version.rb
51
38
  homepage: https://github.com/namusyaka/Ruby4Misawa
52
- licenses: []
53
-
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
54
42
  post_install_message:
55
43
  rdoc_options: []
56
-
57
- require_paths:
44
+ require_paths:
58
45
  - lib
59
- required_ruby_version: !ruby/object:Gem::Requirement
60
- none: false
61
- requirements:
62
- - - ">="
63
- - !ruby/object:Gem::Version
64
- hash: 3
65
- segments:
66
- - 0
67
- version: "0"
68
- required_rubygems_version: !ruby/object:Gem::Requirement
69
- none: false
70
- requirements:
71
- - - ">="
72
- - !ruby/object:Gem::Version
73
- hash: 3
74
- segments:
75
- - 0
76
- version: "0"
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
77
56
  requirements: []
78
-
79
57
  rubyforge_project:
80
- rubygems_version: 1.8.1
58
+ rubygems_version: 2.0.2
81
59
  signing_key:
82
- specification_version: 3
83
- summary: !binary |
84
- 44K544Kv44Os44Kk44OU44Oz44Kw44Op44Kk44OW44Op44OqIGZvciDlnLDn
85
- jYTjga7jg5/jgrXjg68=
86
-
60
+ specification_version: 4
61
+ summary: A scraping library for Jigoku no Misawa
87
62
  test_files: []
88
-
63
+ has_rdoc:
data/README.rdoc DELETED
@@ -1,24 +0,0 @@
1
- = Ruby4Misawa
2
-
3
- == これはなにか
4
-
5
- - 地獄のミサワの画像を取ってくるやつ
6
- - 画像はURLで出力するか、保存するかを選ぶ
7
-
8
- == インストール
9
-
10
- gem install Ruby4Misawa でおk
11
-
12
- == つかいかた
13
-
14
- libにあるRuby4Misawa.rbを適当に移動して、
15
- ruby Ruby4Misawa.rb [オプション] [値]
16
-
17
- [-t [uri, save]] uriは画像のURIを出力、saveは保存しまくる。permission関係でこける可能性はありけり。
18
- [-c [カテゴリ名]] ルシフェル や バイトリーダーのように指定。一つしか指定できないので注意。
19
- [-p [対象ページ]] 1 や 0のような数値か、0..5のようにRange形式で指定
20
-
21
- --type, --category, --pageでも可
22
-
23
- ちなみに何も指定しないと地獄のミサワのトップページにあるミサワ画像(と思われるもの)を取ってくる
24
-
@@ -1,13 +0,0 @@
1
- # -*- encoding: UTF-8 -*-
2
-
3
- Gem::Specification.new do | spec |
4
- spec.name = 'Ruby4Misawa'
5
- spec.version = '0.1.2'
6
- spec.summary = 'スクレイピングライブラリ for 地獄のミサワ'
7
- spec.description = '地獄のミサワブログにあるURLがとれたり、そのまま画像を保存したりできます。'
8
- spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
9
- spec.author = 'namusyaka'
10
- spec.email = 'namusyaka@gmail.com'
11
- spec.files = %w( README.rdoc Ruby4Misawa-0.1.2.gemspec Gemfile lib lib/Ruby4Misawa.rb )
12
- spec.add_development_dependency('nokogiri')
13
- end