sosowa 0.0.2 → 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +6 -0
- data/README.md +8 -0
- data/lib/sosowa/parser.rb +12 -1
- data/lib/sosowa/scheme.rb +6 -1
- data/lib/sosowa/version.rb +2 -2
- data/lib/sosowa.rb +21 -3
- data/samples/token_segment.rb +35 -0
- data/sosowa.gemspec +1 -1
- metadata +7 -5
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
Sosowa Parser for Ruby 1.9.x
|
4
4
|
|
5
|
+
## Requirements
|
6
|
+
|
7
|
+
* Ruby 1.9.x
|
8
|
+
* mechanize gem
|
9
|
+
|
5
10
|
## Installation
|
6
11
|
|
7
12
|
gem install sosowa
|
@@ -17,6 +22,9 @@ Sosowa Parser for Ruby 1.9.x
|
|
17
22
|
# 作品集番号156の1320873807を持ってくる
|
18
23
|
novel = Sosowa.get(:log => 156, :key => 1320873807)
|
19
24
|
puts novel.text
|
25
|
+
|
26
|
+
# "ナズーリン"がタイトルに含まれているSSの数を出力する
|
27
|
+
puts Sosowa.search("ナズーリン", :type => :title).size
|
20
28
|
|
21
29
|
## Contributing
|
22
30
|
|
data/lib/sosowa/parser.rb
CHANGED
@@ -6,8 +6,18 @@ module Sosowa
|
|
6
6
|
@agent.user_agent = "Sosowa Ruby #{Sosowa::VERSION}"
|
7
7
|
end
|
8
8
|
|
9
|
+
def search(query, args={})
|
10
|
+
params = Sosowa.serialize_parameter({:mode => :search, :type => (args[:type] ? args[:type] : :insubject), :query => query.tosjis})
|
11
|
+
parse_index(URI.join(Sosowa::BASE_URL, params))
|
12
|
+
end
|
13
|
+
|
9
14
|
def fetch_index(log)
|
10
|
-
|
15
|
+
params = Sosowa.serialize_parameter({:log => log})
|
16
|
+
parse_index(URI.join(Sosowa::BASE_URL, params))
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_index(url)
|
20
|
+
page = @agent.get(url)
|
11
21
|
indexes = []
|
12
22
|
tr = page.search("tr")
|
13
23
|
tr = tr[1, tr.size-1]
|
@@ -19,6 +29,7 @@ module Sosowa
|
|
19
29
|
else
|
20
30
|
title = tr.search(%{td[@class="title cell_title"] > a}).inner_html.to_s.toutf8.strip
|
21
31
|
tags = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["title"].value.split(" / ")
|
32
|
+
log = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/log=(\d+)$/, '\1').to_i
|
22
33
|
key = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/^.+key=(.+?)&.+$/, '\1').to_i
|
23
34
|
author = tr.search(%{td[@class="cell_author"]}).inner_html.to_s.toutf8.strip
|
24
35
|
created_at = Time.parse(tr.search(%{td[@class="cell_created"]}).inner_html.to_s.toutf8.strip)
|
data/lib/sosowa/scheme.rb
CHANGED
@@ -22,7 +22,8 @@ module Sosowa
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def fetch(log, key)
|
25
|
-
|
25
|
+
params = Sosowa.serialize_parameter({:mode => :read, :log => log, :key => key})
|
26
|
+
@page = @agent.get(URI.join(Sosowa::BASE_URL, params))
|
26
27
|
tags = (@page/%{dl[@class="info"][1] > dd > a}).map{|t| t.inner_html.to_s.toutf8 }
|
27
28
|
text = (@page/%{div[@class="contents ss"]})[0].inner_html.to_s.toutf8
|
28
29
|
ps = (@page/%{div[@class="aft"]})[0].inner_html.to_s.toutf8
|
@@ -78,6 +79,10 @@ module Sosowa
|
|
78
79
|
form.field_with(:name => "point").option_with(:value => (params[:point].to_s || "0")).select
|
79
80
|
form.click_button
|
80
81
|
end
|
82
|
+
|
83
|
+
def plain
|
84
|
+
return @element[:text].gsub(/(<br>|\r?\n)/, "")
|
85
|
+
end
|
81
86
|
end
|
82
87
|
|
83
88
|
class Comment < Scheme
|
data/lib/sosowa/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Sosowa
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.1"
|
3
|
+
end
|
data/lib/sosowa.rb
CHANGED
@@ -1,14 +1,27 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.expand_path("../", __FILE__))
|
2
1
|
require "kconv"
|
3
2
|
require "mechanize"
|
4
3
|
require "time"
|
5
|
-
require "
|
4
|
+
require "uri"
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.expand_path("../", __FILE__))
|
6
7
|
require "sosowa/version"
|
7
8
|
require "sosowa/scheme"
|
8
9
|
require "sosowa/parser"
|
9
10
|
|
10
11
|
module Sosowa
|
11
|
-
BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l"
|
12
|
+
BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l/"
|
13
|
+
|
14
|
+
# @param [Hash] parameter
|
15
|
+
# @return [String] URL Serialized parameters
|
16
|
+
def self.serialize_parameter parameter
|
17
|
+
return "" unless parameter.class == Hash
|
18
|
+
ant = Hash.new
|
19
|
+
parameter.each do |key, value|
|
20
|
+
ant[key.to_sym] = value.to_s
|
21
|
+
end
|
22
|
+
param = ant.inject(""){|k,v|k+"&#{v[0]}=#{URI.escape(v[1])}"}.sub!(/^&/,"?")
|
23
|
+
return param ? param : ""
|
24
|
+
end
|
12
25
|
|
13
26
|
def self.get(args={})
|
14
27
|
args[:log] ||= 0
|
@@ -19,4 +32,9 @@ module Sosowa
|
|
19
32
|
parser.fetch_index(args[:log])
|
20
33
|
end
|
21
34
|
end
|
35
|
+
|
36
|
+
def self.search(query, args={})
|
37
|
+
parser = Parser.new
|
38
|
+
parser.search(query, args)
|
39
|
+
end
|
22
40
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
# 創想話の最新版から適当なSSを取得してMeCab(+ 東方MeCab辞書)を用いてトークナイズします。
|
4
|
+
|
5
|
+
require "MeCab"
|
6
|
+
require "sosowa"
|
7
|
+
|
8
|
+
module MeCab
|
9
|
+
class Tagger
|
10
|
+
alias_method :parseToNode_org, :parseToNode
|
11
|
+
private :parseToNode_org
|
12
|
+
|
13
|
+
def parseToNode(*args)
|
14
|
+
node = parseToNode_org(*args)
|
15
|
+
nodes = []
|
16
|
+
while node
|
17
|
+
nodes.push(node)
|
18
|
+
node = node.next
|
19
|
+
end
|
20
|
+
return nodes[1, nodes.size - 2]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
puts "Fetching thdic-mecab..."
|
26
|
+
system("curl -L https://github.com/oame/thdic-mecab/raw/master/pkg/thdic-mecab.dic > thdic-mecab.dic")
|
27
|
+
|
28
|
+
puts "Done. Initialize MeCab::Tagger"
|
29
|
+
mecab = MeCab::Tagger.new("-u thdic-mecab.dic")
|
30
|
+
|
31
|
+
text = Sosowa.get.sample.fetch.text.gsub(/(<br>|\r?\n)/, "")
|
32
|
+
tokens = mecab.parseToNode(text)
|
33
|
+
tokens.each do |token|
|
34
|
+
puts token.feature
|
35
|
+
end
|
data/sosowa.gemspec
CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |gem|
|
|
5
5
|
gem.authors = ["Oame"]
|
6
6
|
gem.email = ["oame@oameya.com"]
|
7
7
|
gem.description = %q{Sosowa Parser for Ruby}
|
8
|
-
gem.summary = %q{Sosowa Parser for Ruby.}
|
8
|
+
gem.summary = %q{Sosowa Parser for Ruby 1.9.x.}
|
9
9
|
gem.homepage = ""
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sosowa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.1'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-10 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
16
|
-
requirement: &
|
16
|
+
requirement: &70307433904340 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70307433904340
|
25
25
|
description: Sosowa Parser for Ruby
|
26
26
|
email:
|
27
27
|
- oame@oameya.com
|
@@ -30,6 +30,7 @@ extensions: []
|
|
30
30
|
extra_rdoc_files: []
|
31
31
|
files:
|
32
32
|
- .gitignore
|
33
|
+
- CHANGELOG.md
|
33
34
|
- Gemfile
|
34
35
|
- LICENSE
|
35
36
|
- README.md
|
@@ -38,6 +39,7 @@ files:
|
|
38
39
|
- lib/sosowa/parser.rb
|
39
40
|
- lib/sosowa/scheme.rb
|
40
41
|
- lib/sosowa/version.rb
|
42
|
+
- samples/token_segment.rb
|
41
43
|
- sosowa.gemspec
|
42
44
|
homepage: ''
|
43
45
|
licenses: []
|
@@ -62,6 +64,6 @@ rubyforge_project:
|
|
62
64
|
rubygems_version: 1.8.10
|
63
65
|
signing_key:
|
64
66
|
specification_version: 3
|
65
|
-
summary: Sosowa Parser for Ruby.
|
67
|
+
summary: Sosowa Parser for Ruby 1.9.x.
|
66
68
|
test_files: []
|
67
69
|
has_rdoc:
|