Ruby4Misawa 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3b5ec5d9fb4d7d600949d79b09af92fa0af852fa
4
+ data.tar.gz: 4e387642d93550fdf83a372cfbc141ae2d8fc968
5
+ SHA512:
6
+ metadata.gz: 9fa104901863b91789f0c440a8b7ea2a5462860ab5e588887596e613a7e565a7f6863593cb2c448f3b8d5dae8e61557c27383a8b1b1b116558d5891bb1601136
7
+ data.tar.gz: 8f8104fd574b0f0a64549de8a764fd6570882c737e02a5589566d109fa6ff646df691830bdf72888362a372fd7c5f2593a8c95aeff9a5b8c55e3f81c24355f79
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
- source :rubygems
2
- gemspec :name => 'Ruby4Misawa'
3
- gem 'nokogiri', '1.5.5'
1
+ source 'http://rubygems.org'
2
+
3
+ gemspec :name => "Ruby4Misawa"
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Ruby4Misawa
2
+
3
+ ## About
4
+
5
+ A scraping library for 地獄のミサワ
6
+
7
+ This is not compatible with old version.
8
+
9
+ ## Installation
10
+
11
+ add this line to your Gemfile.
12
+
13
+ `gem 'Ruby4Misawa'`
14
+
15
+ or
16
+
17
+ `$ gem install Ruby4Misawa`
18
+
19
+ ## Usage
20
+
21
+ ### cid
22
+
23
+ ```ruby
24
+ require 'Ruby4Misawa'
25
+
26
+ misawa = Misawa.new(32)
27
+ misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
28
+ ```
29
+
30
+ ### name
31
+
32
+ ```ruby
33
+ require 'Ruby4Misawa'
34
+
35
+ misawa = Misawa.new("あつしさん")
36
+ misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
37
+ ```
38
+
39
+ ### designating page numebr
40
+
41
+ ```ruby
42
+ require 'Ruby4Misawa'
43
+
44
+ misawa = Misawa.new("あつしさん", 2)
45
+ misawa.scrape ##> [{ :image => ..., :title => ..., :date => ..., :identifier => ..., :body => ... }, ...]
46
+ ```
47
+
48
+ ## Contributing
49
+
50
+ 1. fork the project.
51
+ 2. create your feature branch. (`git checkout -b my-feature`)
52
+ 3. commit your changes. (`git commit -am 'commit message.'`)
53
+ 4. push to the branch. (`git push origin my-feature`)
54
+ 5. send pull request.
55
+
56
+ ## License
57
+
58
+ MIT
@@ -0,0 +1,13 @@
1
+ require File.expand_path("../lib/version", __FILE__)
2
+
3
+ Gem::Specification.new "Ruby4Misawa", Misawa::VERSION do |spec|
4
+ spec.summary = 'A scraping library for Jigoku no Misawa'
5
+ spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
6
+ spec.authors = ['namusyaka']
7
+ spec.email = 'namusyaka@gmail.com'
8
+ spec.files = `git ls-files`.split("\n") - %w(.gitignore)
9
+ spec.test_files = spec.files.select { |path| path =~ /^spec\/.*_spec\.rb/ }
10
+ spec.license = "MIT"
11
+
12
+ spec.add_dependency('nokogiri')
13
+ end
data/lib/Ruby4Misawa.rb CHANGED
@@ -1,102 +1,62 @@
1
- # -*- encoding: UTF-8 -*-
2
-
3
- %w(
4
- uri
5
- open-uri
6
- kconv
7
- optparse
8
- rubygems
9
- nokogiri
10
- ).each { | name | require name }
1
+ require 'open-uri'
2
+ require 'kconv'
3
+ require 'rubygems'
4
+ require 'nokogiri'
11
5
 
12
6
  class Misawa
13
-
14
- class NotFoundError < ArgumentError; end;
15
-
16
- DOMAIN = 'http://jigokuno.com/'
17
-
7
+ DOMAIN = 'http://jigokuno.com/'
18
8
  @@categories = {}
19
9
 
10
+ class NotFoundError < ArgumentError; end
11
+
20
12
  Nokogiri.HTML(open(DOMAIN).read).css('dl').each do |dl|
21
13
  if dl.children[0].inner_html.toutf8 == '惚れさせ男子達'
22
- dl.children[2].children[1].children.to_a.delete_if{|node| node.class == Nokogiri::XML::Text}.each do |li|
14
+ dl.children[2].children[1].children.to_a.delete_if{|node|node.class == Nokogiri::XML::Text}.each do |li|
23
15
  a = li.child
24
16
  @@categories[a.text.scan(/(.+?)(?:\([0-9]+\))?$/)[0][0]] = a[:href].scan(/cid=([0-9]+)/)[0][0]
25
17
  end
26
18
  end
27
19
  end
28
20
 
29
- attr_reader :category, :page
30
-
31
- def initialize(category, page = 0)
32
- raise NotFoundError unless @@categories[category]
33
- @category = @@categories[category]
34
- @page = page =~ /^[0-9]+?\.\.[0-9]+?$/ ? eval(page) : page
21
+ def initialize(name, page = 0)
22
+ @name = name
23
+ @cid = @name.is_a?(Integer) ? @name : name_to_cid(@name)
24
+ @page = page
35
25
  end
36
26
 
37
27
  def scrape
38
- create_uri.inject([]) do | result, uri |
39
- body = get_body(uri)
40
- Nokogiri.HTML(body).css('img.pict').map { | img | result << img['src'] } if body
41
- result
42
- end
43
- end
44
-
45
- class << self;
28
+ data = []
46
29
 
47
- def save_misawa(category, page)
48
- misawa = self.new(category, page)
49
- misawa.scrape.each_with_index do | src, i |
50
- File.open("#{misawa.category}-#{i}.gif", 'w') { | f | f.write(misawa.get_body(src)) }
51
- end
30
+ begin
31
+ nokogiri = Nokogiri.HTML(open(misawa_uri).read)
32
+ rescue OpenURI::HTTPError
33
+ raise NotFoundError
52
34
  end
53
35
 
54
- def uri_misawa(category, page)
55
- misawa = self.new(category, page)
56
- misawa.scrape.each_with_index do | src, i |
57
- puts "#{misawa.category}-#{i} : #{src} "
36
+ # parse some attributes
37
+ nokogiri.xpath('//comment()[contains(., "rdf")]').each do |entry|
38
+ attributes = Nokogiri.XML(entry.to_s.toutf8.gsub(/^<!--|-->$/, "")).child.css('rdf|Description')[0].attributes
39
+ data << %w[title date identifier].inject({}) do |result, key|
40
+ result[key.to_sym] = attributes[key].value
41
+ result
58
42
  end
59
43
  end
60
44
 
61
- def method_missing(name, *args)
62
- raise ArgumentError;
63
- end
45
+ # parse images
46
+ nokogiri.css('img.pict').to_a.each_with_index { |image, i|
47
+ data[i].merge!(:image => image['src'], :body => image['alt'])
48
+ }
64
49
 
50
+ data
65
51
  end
66
52
 
67
- def get_body(uri)
68
- open(URI.encode(uri)).read
69
- end
53
+ private
70
54
 
71
- def create_uri
72
- if @page.is_a?(Range)
73
- base = "#{DOMAIN}/?cid=#{@category}&page="
74
- @page.map { | i | "#{base}#{i}" }
75
- else
76
- ["#{DOMAIN}/?cid=#{@category}&page=#{@page}"]
77
- end
55
+ def misawa_uri
56
+ "#{DOMAIN}?cid=#{@cid}&page=#{@page}"
78
57
  end
79
58
 
80
- end
81
-
82
- if $0 === __FILE__
83
-
84
- OptionParser.new do | opt |
85
-
86
- arguments = {}
87
-
88
- opt.on('-t [TYPE]', '--type [TYPE]') do | value |
89
- arguments[:type] = value
90
- end
91
- opt.on('-p [PAGE]', '--page [PAGE]') do | value |
92
- arguments[:page] = value
93
- end
94
- opt.on('-c [CATEGORY]', '--category [CATEGORY]') do | value |
95
- arguments[:category] = value
96
- end
97
-
98
- opt.parse!(ARGV)
99
- Misawa.send("#{arguments[:type] || 'uri'}_misawa", arguments[:category], arguments[:page])
59
+ def name_to_cid(name)
60
+ @@categories[name]
100
61
  end
101
-
102
62
  end
data/lib/version.rb ADDED
@@ -0,0 +1,3 @@
1
+ class Misawa
2
+ VERSION = '0.1.4'
3
+ end
metadata CHANGED
@@ -1,88 +1,63 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: Ruby4Misawa
3
- version: !ruby/object:Gem::Version
4
- hash: 31
5
- prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 2
10
- version: 0.1.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - namusyaka
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2012-09-29 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-08-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
21
14
  name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
22
21
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 3
29
- segments:
30
- - 0
31
- version: "0"
32
- type: :development
33
- version_requirements: *id001
34
- description: !binary |
35
- 5Zyw542E44Gu44Of44K144Ov44OW44Ot44Kw44Gr44GC44KLVVJM44GM44Go
36
- 44KM44Gf44KK44CB44Gd44Gu44G+44G+55S75YOP44KS5L+d5a2Y44GX44Gf
37
- 44KK44Gn44GN44G+44GZ44CC
38
-
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description:
39
28
  email: namusyaka@gmail.com
40
29
  executables: []
41
-
42
30
  extensions: []
43
-
44
31
  extra_rdoc_files: []
45
-
46
- files:
47
- - README.rdoc
48
- - Ruby4Misawa-0.1.2.gemspec
32
+ files:
49
33
  - Gemfile
34
+ - README.md
35
+ - Ruby4Misawa.gemspec
50
36
  - lib/Ruby4Misawa.rb
37
+ - lib/version.rb
51
38
  homepage: https://github.com/namusyaka/Ruby4Misawa
52
- licenses: []
53
-
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
54
42
  post_install_message:
55
43
  rdoc_options: []
56
-
57
- require_paths:
44
+ require_paths:
58
45
  - lib
59
- required_ruby_version: !ruby/object:Gem::Requirement
60
- none: false
61
- requirements:
62
- - - ">="
63
- - !ruby/object:Gem::Version
64
- hash: 3
65
- segments:
66
- - 0
67
- version: "0"
68
- required_rubygems_version: !ruby/object:Gem::Requirement
69
- none: false
70
- requirements:
71
- - - ">="
72
- - !ruby/object:Gem::Version
73
- hash: 3
74
- segments:
75
- - 0
76
- version: "0"
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
77
56
  requirements: []
78
-
79
57
  rubyforge_project:
80
- rubygems_version: 1.8.1
58
+ rubygems_version: 2.0.2
81
59
  signing_key:
82
- specification_version: 3
83
- summary: !binary |
84
- 44K544Kv44Os44Kk44OU44Oz44Kw44Op44Kk44OW44Op44OqIGZvciDlnLDn
85
- jYTjga7jg5/jgrXjg68=
86
-
60
+ specification_version: 4
61
+ summary: A scraping library for Jigoku no Misawa
87
62
  test_files: []
88
-
63
+ has_rdoc:
data/README.rdoc DELETED
@@ -1,24 +0,0 @@
1
- = Ruby4Misawa
2
-
3
- == これはなにか
4
-
5
- - 地獄のミサワの画像を取ってくるやつ
6
- - 画像はURLで出力するか、保存するかを選ぶ
7
-
8
- == インストール
9
-
10
- gem install Ruby4Misawa でおk
11
-
12
- == つかいかた
13
-
14
- libにあるRuby4Misawa.rbを適当に移動して、
15
- ruby Ruby4Misawa.rb [オプション] [値]
16
-
17
- [-t [uri, save]] uriは画像のURIを出力、saveは保存しまくる。permission関係でこける可能性はありけり。
18
- [-c [カテゴリ名]] ルシフェル や バイトリーダーのように指定。一つしか指定できないので注意。
19
- [-p [対象ページ]] 1 や 0のような数値か、0..5のようにRange形式で指定
20
-
21
- --type, --category, --pageでも可
22
-
23
- ちなみに何も指定しないと地獄のミサワのトップページにあるミサワ画像(と思われるもの)を取ってくる
24
-
@@ -1,13 +0,0 @@
1
- # -*- encoding: UTF-8 -*-
2
-
3
- Gem::Specification.new do | spec |
4
- spec.name = 'Ruby4Misawa'
5
- spec.version = '0.1.2'
6
- spec.summary = 'スクレイピングライブラリ for 地獄のミサワ'
7
- spec.description = '地獄のミサワブログにあるURLがとれたり、そのまま画像を保存したりできます。'
8
- spec.homepage = 'https://github.com/namusyaka/Ruby4Misawa'
9
- spec.author = 'namusyaka'
10
- spec.email = 'namusyaka@gmail.com'
11
- spec.files = %w( README.rdoc Ruby4Misawa-0.1.2.gemspec Gemfile lib lib/Ruby4Misawa.rb )
12
- spec.add_development_dependency('nokogiri')
13
- end