sosowa 0.0.2 → 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +6 -0
- data/README.md +8 -0
- data/lib/sosowa/parser.rb +12 -1
- data/lib/sosowa/scheme.rb +6 -1
- data/lib/sosowa/version.rb +2 -2
- data/lib/sosowa.rb +21 -3
- data/samples/token_segment.rb +35 -0
- data/sosowa.gemspec +1 -1
- metadata +7 -5
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
Sosowa Parser for Ruby 1.9.x
|
4
4
|
|
5
|
+
## Requirements
|
6
|
+
|
7
|
+
* Ruby 1.9.x
|
8
|
+
* mechanize gem
|
9
|
+
|
5
10
|
## Installation
|
6
11
|
|
7
12
|
gem install sosowa
|
@@ -17,6 +22,9 @@ Sosowa Parser for Ruby 1.9.x
|
|
17
22
|
# 作品集番号156の1320873807を持ってくる
|
18
23
|
novel = Sosowa.get(:log => 156, :key => 1320873807)
|
19
24
|
puts novel.text
|
25
|
+
|
26
|
+
# "ナズーリン"がタイトルに含まれているSSの数を出力する
|
27
|
+
puts Sosowa.search("ナズーリン", :type => :title).size
|
20
28
|
|
21
29
|
## Contributing
|
22
30
|
|
data/lib/sosowa/parser.rb
CHANGED
@@ -6,8 +6,18 @@ module Sosowa
|
|
6
6
|
@agent.user_agent = "Sosowa Ruby #{Sosowa::VERSION}"
|
7
7
|
end
|
8
8
|
|
9
|
+
def search(query, args={})
|
10
|
+
params = Sosowa.serialize_parameter({:mode => :search, :type => (args[:type] ? args[:type] : :insubject), :query => query.tosjis})
|
11
|
+
parse_index(URI.join(Sosowa::BASE_URL, params))
|
12
|
+
end
|
13
|
+
|
9
14
|
def fetch_index(log)
|
10
|
-
|
15
|
+
params = Sosowa.serialize_parameter({:log => log})
|
16
|
+
parse_index(URI.join(Sosowa::BASE_URL, params))
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_index(url)
|
20
|
+
page = @agent.get(url)
|
11
21
|
indexes = []
|
12
22
|
tr = page.search("tr")
|
13
23
|
tr = tr[1, tr.size-1]
|
@@ -19,6 +29,7 @@ module Sosowa
|
|
19
29
|
else
|
20
30
|
title = tr.search(%{td[@class="title cell_title"] > a}).inner_html.to_s.toutf8.strip
|
21
31
|
tags = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["title"].value.split(" / ")
|
32
|
+
log = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/log=(\d+)$/, '\1').to_i
|
22
33
|
key = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/^.+key=(.+?)&.+$/, '\1').to_i
|
23
34
|
author = tr.search(%{td[@class="cell_author"]}).inner_html.to_s.toutf8.strip
|
24
35
|
created_at = Time.parse(tr.search(%{td[@class="cell_created"]}).inner_html.to_s.toutf8.strip)
|
data/lib/sosowa/scheme.rb
CHANGED
@@ -22,7 +22,8 @@ module Sosowa
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def fetch(log, key)
|
25
|
-
|
25
|
+
params = Sosowa.serialize_parameter({:mode => :read, :log => log, :key => key})
|
26
|
+
@page = @agent.get(URI.join(Sosowa::BASE_URL, params))
|
26
27
|
tags = (@page/%{dl[@class="info"][1] > dd > a}).map{|t| t.inner_html.to_s.toutf8 }
|
27
28
|
text = (@page/%{div[@class="contents ss"]})[0].inner_html.to_s.toutf8
|
28
29
|
ps = (@page/%{div[@class="aft"]})[0].inner_html.to_s.toutf8
|
@@ -78,6 +79,10 @@ module Sosowa
|
|
78
79
|
form.field_with(:name => "point").option_with(:value => (params[:point].to_s || "0")).select
|
79
80
|
form.click_button
|
80
81
|
end
|
82
|
+
|
83
|
+
def plain
|
84
|
+
return @element[:text].gsub(/(<br>|\r?\n)/, "")
|
85
|
+
end
|
81
86
|
end
|
82
87
|
|
83
88
|
class Comment < Scheme
|
data/lib/sosowa/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Sosowa
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.1"
|
3
|
+
end
|
data/lib/sosowa.rb
CHANGED
@@ -1,14 +1,27 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.expand_path("../", __FILE__))
|
2
1
|
require "kconv"
|
3
2
|
require "mechanize"
|
4
3
|
require "time"
|
5
|
-
require "
|
4
|
+
require "uri"
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.expand_path("../", __FILE__))
|
6
7
|
require "sosowa/version"
|
7
8
|
require "sosowa/scheme"
|
8
9
|
require "sosowa/parser"
|
9
10
|
|
10
11
|
module Sosowa
|
11
|
-
BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l"
|
12
|
+
BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l/"
|
13
|
+
|
14
|
+
# @param [Hash] parameter
|
15
|
+
# @return [String] URL Serialized parameters
|
16
|
+
def self.serialize_parameter parameter
|
17
|
+
return "" unless parameter.class == Hash
|
18
|
+
ant = Hash.new
|
19
|
+
parameter.each do |key, value|
|
20
|
+
ant[key.to_sym] = value.to_s
|
21
|
+
end
|
22
|
+
param = ant.inject(""){|k,v|k+"&#{v[0]}=#{URI.escape(v[1])}"}.sub!(/^&/,"?")
|
23
|
+
return param ? param : ""
|
24
|
+
end
|
12
25
|
|
13
26
|
def self.get(args={})
|
14
27
|
args[:log] ||= 0
|
@@ -19,4 +32,9 @@ module Sosowa
|
|
19
32
|
parser.fetch_index(args[:log])
|
20
33
|
end
|
21
34
|
end
|
35
|
+
|
36
|
+
def self.search(query, args={})
|
37
|
+
parser = Parser.new
|
38
|
+
parser.search(query, args)
|
39
|
+
end
|
22
40
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
# 創想話の最新版から適当なSSを取得してMeCab(+ 東方MeCab辞書)を用いてトークナイズします。
|
4
|
+
|
5
|
+
require "MeCab"
|
6
|
+
require "sosowa"
|
7
|
+
|
8
|
+
module MeCab
|
9
|
+
class Tagger
|
10
|
+
alias_method :parseToNode_org, :parseToNode
|
11
|
+
private :parseToNode_org
|
12
|
+
|
13
|
+
def parseToNode(*args)
|
14
|
+
node = parseToNode_org(*args)
|
15
|
+
nodes = []
|
16
|
+
while node
|
17
|
+
nodes.push(node)
|
18
|
+
node = node.next
|
19
|
+
end
|
20
|
+
return nodes[1, nodes.size - 2]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
puts "Fetching thdic-mecab..."
|
26
|
+
system("curl -L https://github.com/oame/thdic-mecab/raw/master/pkg/thdic-mecab.dic > thdic-mecab.dic")
|
27
|
+
|
28
|
+
puts "Done. Initialize MeCab::Tagger"
|
29
|
+
mecab = MeCab::Tagger.new("-u thdic-mecab.dic")
|
30
|
+
|
31
|
+
text = Sosowa.get.sample.fetch.text.gsub(/(<br>|\r?\n)/, "")
|
32
|
+
tokens = mecab.parseToNode(text)
|
33
|
+
tokens.each do |token|
|
34
|
+
puts token.feature
|
35
|
+
end
|
data/sosowa.gemspec
CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |gem|
|
|
5
5
|
gem.authors = ["Oame"]
|
6
6
|
gem.email = ["oame@oameya.com"]
|
7
7
|
gem.description = %q{Sosowa Parser for Ruby}
|
8
|
-
gem.summary = %q{Sosowa Parser for Ruby.}
|
8
|
+
gem.summary = %q{Sosowa Parser for Ruby 1.9.x.}
|
9
9
|
gem.homepage = ""
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sosowa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.1'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-10 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
16
|
-
requirement: &
|
16
|
+
requirement: &70307433904340 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70307433904340
|
25
25
|
description: Sosowa Parser for Ruby
|
26
26
|
email:
|
27
27
|
- oame@oameya.com
|
@@ -30,6 +30,7 @@ extensions: []
|
|
30
30
|
extra_rdoc_files: []
|
31
31
|
files:
|
32
32
|
- .gitignore
|
33
|
+
- CHANGELOG.md
|
33
34
|
- Gemfile
|
34
35
|
- LICENSE
|
35
36
|
- README.md
|
@@ -38,6 +39,7 @@ files:
|
|
38
39
|
- lib/sosowa/parser.rb
|
39
40
|
- lib/sosowa/scheme.rb
|
40
41
|
- lib/sosowa/version.rb
|
42
|
+
- samples/token_segment.rb
|
41
43
|
- sosowa.gemspec
|
42
44
|
homepage: ''
|
43
45
|
licenses: []
|
@@ -62,6 +64,6 @@ rubyforge_project:
|
|
62
64
|
rubygems_version: 1.8.10
|
63
65
|
signing_key:
|
64
66
|
specification_version: 3
|
65
|
-
summary: Sosowa Parser for Ruby.
|
67
|
+
summary: Sosowa Parser for Ruby 1.9.x.
|
66
68
|
test_files: []
|
67
69
|
has_rdoc:
|