sosowa 0.0.2 → 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ## 0.1
2
+ * Sosowa#searchを追加
3
+ * Sosowa::Novel#plainを使って<br>タグや改行コードが取り除かれたテキストを得ることが出来ます。
4
+
5
+ ## 0.0.2
6
+ * 最初のリリースです!
data/README.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  Sosowa Parser for Ruby 1.9.x
4
4
 
5
+ ## Requirements
6
+
7
+ * Ruby 1.9.x
8
+ * mechanize gem
9
+
5
10
  ## Installation
6
11
 
7
12
  gem install sosowa
@@ -17,6 +22,9 @@ Sosowa Parser for Ruby 1.9.x
17
22
  # 作品集番号156の1320873807を持ってくる
18
23
  novel = Sosowa.get(:log => 156, :key => 1320873807)
19
24
  puts novel.text
25
+
26
+ # "ナズーリン"がタイトルに含まれているSSの数を出力する
27
+ puts Sosowa.search("ナズーリン", :type => :title).size
20
28
 
21
29
  ## Contributing
22
30
 
data/lib/sosowa/parser.rb CHANGED
@@ -6,8 +6,18 @@ module Sosowa
6
6
  @agent.user_agent = "Sosowa Ruby #{Sosowa::VERSION}"
7
7
  end
8
8
 
9
+ def search(query, args={})
10
+ params = Sosowa.serialize_parameter({:mode => :search, :type => (args[:type] ? args[:type] : :insubject), :query => query.tosjis})
11
+ parse_index(URI.join(Sosowa::BASE_URL, params))
12
+ end
13
+
9
14
  def fetch_index(log)
10
- page = @agent.get("#{Sosowa::BASE_URL}/?log=#{log}")
15
+ params = Sosowa.serialize_parameter({:log => log})
16
+ parse_index(URI.join(Sosowa::BASE_URL, params))
17
+ end
18
+
19
+ def parse_index(url)
20
+ page = @agent.get(url)
11
21
  indexes = []
12
22
  tr = page.search("tr")
13
23
  tr = tr[1, tr.size-1]
@@ -19,6 +29,7 @@ module Sosowa
19
29
  else
20
30
  title = tr.search(%{td[@class="title cell_title"] > a}).inner_html.to_s.toutf8.strip
21
31
  tags = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["title"].value.split(" / ")
32
+ log = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/log=(\d+)$/, '\1').to_i
22
33
  key = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/^.+key=(.+?)&.+$/, '\1').to_i
23
34
  author = tr.search(%{td[@class="cell_author"]}).inner_html.to_s.toutf8.strip
24
35
  created_at = Time.parse(tr.search(%{td[@class="cell_created"]}).inner_html.to_s.toutf8.strip)
data/lib/sosowa/scheme.rb CHANGED
@@ -22,7 +22,8 @@ module Sosowa
22
22
  end
23
23
 
24
24
  def fetch(log, key)
25
- @page = @agent.get("#{Sosowa::BASE_URL}/?mode=read&log=#{log}&key=#{key}")
25
+ params = Sosowa.serialize_parameter({:mode => :read, :log => log, :key => key})
26
+ @page = @agent.get(URI.join(Sosowa::BASE_URL, params))
26
27
  tags = (@page/%{dl[@class="info"][1] > dd > a}).map{|t| t.inner_html.to_s.toutf8 }
27
28
  text = (@page/%{div[@class="contents ss"]})[0].inner_html.to_s.toutf8
28
29
  ps = (@page/%{div[@class="aft"]})[0].inner_html.to_s.toutf8
@@ -78,6 +79,10 @@ module Sosowa
78
79
  form.field_with(:name => "point").option_with(:value => (params[:point].to_s || "0")).select
79
80
  form.click_button
80
81
  end
82
+
83
+ def plain
84
+ return @element[:text].gsub(/(<br>|\r?\n)/, "")
85
+ end
81
86
  end
82
87
 
83
88
  class Comment < Scheme
@@ -1,3 +1,3 @@
1
1
  module Sosowa
2
- VERSION = "0.0.2"
3
- end
2
+ VERSION = "0.1"
3
+ end
data/lib/sosowa.rb CHANGED
@@ -1,14 +1,27 @@
1
- $LOAD_PATH.unshift(File.expand_path("../", __FILE__))
2
1
  require "kconv"
3
2
  require "mechanize"
4
3
  require "time"
5
- require "cgi"
4
+ require "uri"
5
+
6
+ $LOAD_PATH.unshift(File.expand_path("../", __FILE__))
6
7
  require "sosowa/version"
7
8
  require "sosowa/scheme"
8
9
  require "sosowa/parser"
9
10
 
10
11
  module Sosowa
11
- BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l"
12
+ BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l/"
13
+
14
+ # @param [Hash] parameter
15
+ # @return [String] URL Serialized parameters
16
+ def self.serialize_parameter parameter
17
+ return "" unless parameter.class == Hash
18
+ ant = Hash.new
19
+ parameter.each do |key, value|
20
+ ant[key.to_sym] = value.to_s
21
+ end
22
+ param = ant.inject(""){|k,v|k+"&#{v[0]}=#{URI.escape(v[1])}"}.sub!(/^&/,"?")
23
+ return param ? param : ""
24
+ end
12
25
 
13
26
  def self.get(args={})
14
27
  args[:log] ||= 0
@@ -19,4 +32,9 @@ module Sosowa
19
32
  parser.fetch_index(args[:log])
20
33
  end
21
34
  end
35
+
36
+ def self.search(query, args={})
37
+ parser = Parser.new
38
+ parser.search(query, args)
39
+ end
22
40
  end
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+ # 創想話の最新版から適当なSSを取得してMeCab(+ 東方MeCab辞書)を用いてトークナイズします。
4
+
5
+ require "MeCab"
6
+ require "sosowa"
7
+
8
+ module MeCab
9
+ class Tagger
10
+ alias_method :parseToNode_org, :parseToNode
11
+ private :parseToNode_org
12
+
13
+ def parseToNode(*args)
14
+ node = parseToNode_org(*args)
15
+ nodes = []
16
+ while node
17
+ nodes.push(node)
18
+ node = node.next
19
+ end
20
+ return nodes[1, nodes.size - 2]
21
+ end
22
+ end
23
+ end
24
+
25
+ puts "Fetching thdic-mecab..."
26
+ system("curl -L https://github.com/oame/thdic-mecab/raw/master/pkg/thdic-mecab.dic > thdic-mecab.dic")
27
+
28
+ puts "Done. Initialize MeCab::Tagger"
29
+ mecab = MeCab::Tagger.new("-u thdic-mecab.dic")
30
+
31
+ text = Sosowa.get.sample.fetch.text.gsub(/(<br>|\r?\n)/, "")
32
+ tokens = mecab.parseToNode(text)
33
+ tokens.each do |token|
34
+ puts token.feature
35
+ end
data/sosowa.gemspec CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |gem|
5
5
  gem.authors = ["Oame"]
6
6
  gem.email = ["oame@oameya.com"]
7
7
  gem.description = %q{Sosowa Parser for Ruby}
8
- gem.summary = %q{Sosowa Parser for Ruby.}
8
+ gem.summary = %q{Sosowa Parser for Ruby 1.9.x.}
9
9
  gem.homepage = ""
10
10
 
11
11
  gem.files = `git ls-files`.split($\)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sosowa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: '0.1'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-09 00:00:00.000000000Z
12
+ date: 2012-07-10 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: &70129465130960 !ruby/object:Gem::Requirement
16
+ requirement: &70307433904340 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70129465130960
24
+ version_requirements: *70307433904340
25
25
  description: Sosowa Parser for Ruby
26
26
  email:
27
27
  - oame@oameya.com
@@ -30,6 +30,7 @@ extensions: []
30
30
  extra_rdoc_files: []
31
31
  files:
32
32
  - .gitignore
33
+ - CHANGELOG.md
33
34
  - Gemfile
34
35
  - LICENSE
35
36
  - README.md
@@ -38,6 +39,7 @@ files:
38
39
  - lib/sosowa/parser.rb
39
40
  - lib/sosowa/scheme.rb
40
41
  - lib/sosowa/version.rb
42
+ - samples/token_segment.rb
41
43
  - sosowa.gemspec
42
44
  homepage: ''
43
45
  licenses: []
@@ -62,6 +64,6 @@ rubyforge_project:
62
64
  rubygems_version: 1.8.10
63
65
  signing_key:
64
66
  specification_version: 3
65
- summary: Sosowa Parser for Ruby.
67
+ summary: Sosowa Parser for Ruby 1.9.x.
66
68
  test_files: []
67
69
  has_rdoc: