sosowa 0.0.2 → 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ## 0.1
2
+ * Sosowa#searchを追加
3
+ * Sosowa::Novel#plainを使って<br>タグや改行コードが取り除かれたテキストを得ることが出来ます。
4
+
5
+ ## 0.0.2
6
+ * 最初のリリースです!
data/README.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  Sosowa Parser for Ruby 1.9.x
4
4
 
5
+ ## Requirements
6
+
7
+ * Ruby 1.9.x
8
+ * mechanize gem
9
+
5
10
  ## Installation
6
11
 
7
12
  gem install sosowa
@@ -17,6 +22,9 @@ Sosowa Parser for Ruby 1.9.x
17
22
  # 作品集番号156の1320873807を持ってくる
18
23
  novel = Sosowa.get(:log => 156, :key => 1320873807)
19
24
  puts novel.text
25
+
26
+ # "ナズーリン"がタイトルに含まれているSSの数を出力する
27
+ puts Sosowa.search("ナズーリン", :type => :title).size
20
28
 
21
29
  ## Contributing
22
30
 
data/lib/sosowa/parser.rb CHANGED
@@ -6,8 +6,18 @@ module Sosowa
6
6
  @agent.user_agent = "Sosowa Ruby #{Sosowa::VERSION}"
7
7
  end
8
8
 
9
+ def search(query, args={})
10
+ params = Sosowa.serialize_parameter({:mode => :search, :type => (args[:type] ? args[:type] : :insubject), :query => query.tosjis})
11
+ parse_index(URI.join(Sosowa::BASE_URL, params))
12
+ end
13
+
9
14
  def fetch_index(log)
10
- page = @agent.get("#{Sosowa::BASE_URL}/?log=#{log}")
15
+ params = Sosowa.serialize_parameter({:log => log})
16
+ parse_index(URI.join(Sosowa::BASE_URL, params))
17
+ end
18
+
19
+ def parse_index(url)
20
+ page = @agent.get(url)
11
21
  indexes = []
12
22
  tr = page.search("tr")
13
23
  tr = tr[1, tr.size-1]
@@ -19,6 +29,7 @@ module Sosowa
19
29
  else
20
30
  title = tr.search(%{td[@class="title cell_title"] > a}).inner_html.to_s.toutf8.strip
21
31
  tags = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["title"].value.split(" / ")
32
+ log = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/log=(\d+)$/, '\1').to_i
22
33
  key = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/^.+key=(.+?)&.+$/, '\1').to_i
23
34
  author = tr.search(%{td[@class="cell_author"]}).inner_html.to_s.toutf8.strip
24
35
  created_at = Time.parse(tr.search(%{td[@class="cell_created"]}).inner_html.to_s.toutf8.strip)
data/lib/sosowa/scheme.rb CHANGED
@@ -22,7 +22,8 @@ module Sosowa
22
22
  end
23
23
 
24
24
  def fetch(log, key)
25
- @page = @agent.get("#{Sosowa::BASE_URL}/?mode=read&log=#{log}&key=#{key}")
25
+ params = Sosowa.serialize_parameter({:mode => :read, :log => log, :key => key})
26
+ @page = @agent.get(URI.join(Sosowa::BASE_URL, params))
26
27
  tags = (@page/%{dl[@class="info"][1] > dd > a}).map{|t| t.inner_html.to_s.toutf8 }
27
28
  text = (@page/%{div[@class="contents ss"]})[0].inner_html.to_s.toutf8
28
29
  ps = (@page/%{div[@class="aft"]})[0].inner_html.to_s.toutf8
@@ -78,6 +79,10 @@ module Sosowa
78
79
  form.field_with(:name => "point").option_with(:value => (params[:point].to_s || "0")).select
79
80
  form.click_button
80
81
  end
82
+
83
+ def plain
84
+ return @element[:text].gsub(/(<br>|\r?\n)/, "")
85
+ end
81
86
  end
82
87
 
83
88
  class Comment < Scheme
@@ -1,3 +1,3 @@
1
1
  module Sosowa
2
- VERSION = "0.0.2"
3
- end
2
+ VERSION = "0.1"
3
+ end
data/lib/sosowa.rb CHANGED
@@ -1,14 +1,27 @@
1
- $LOAD_PATH.unshift(File.expand_path("../", __FILE__))
2
1
  require "kconv"
3
2
  require "mechanize"
4
3
  require "time"
5
- require "cgi"
4
+ require "uri"
5
+
6
+ $LOAD_PATH.unshift(File.expand_path("../", __FILE__))
6
7
  require "sosowa/version"
7
8
  require "sosowa/scheme"
8
9
  require "sosowa/parser"
9
10
 
10
11
  module Sosowa
11
- BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l"
12
+ BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l/"
13
+
14
+ # @param [Hash] parameter
15
+ # @return [String] URL Serialized parameters
16
+ def self.serialize_parameter parameter
17
+ return "" unless parameter.class == Hash
18
+ ant = Hash.new
19
+ parameter.each do |key, value|
20
+ ant[key.to_sym] = value.to_s
21
+ end
22
+ param = ant.inject(""){|k,v|k+"&#{v[0]}=#{URI.escape(v[1])}"}.sub!(/^&/,"?")
23
+ return param ? param : ""
24
+ end
12
25
 
13
26
  def self.get(args={})
14
27
  args[:log] ||= 0
@@ -19,4 +32,9 @@ module Sosowa
19
32
  parser.fetch_index(args[:log])
20
33
  end
21
34
  end
35
+
36
+ def self.search(query, args={})
37
+ parser = Parser.new
38
+ parser.search(query, args)
39
+ end
22
40
  end
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+ # 創想話の最新版から適当なSSを取得してMeCab(+ 東方MeCab辞書)を用いてトークナイズします。
4
+
5
+ require "MeCab"
6
+ require "sosowa"
7
+
8
+ module MeCab
9
+ class Tagger
10
+ alias_method :parseToNode_org, :parseToNode
11
+ private :parseToNode_org
12
+
13
+ def parseToNode(*args)
14
+ node = parseToNode_org(*args)
15
+ nodes = []
16
+ while node
17
+ nodes.push(node)
18
+ node = node.next
19
+ end
20
+ return nodes[1, nodes.size - 2]
21
+ end
22
+ end
23
+ end
24
+
25
+ puts "Fetching thdic-mecab..."
26
+ system("curl -L https://github.com/oame/thdic-mecab/raw/master/pkg/thdic-mecab.dic > thdic-mecab.dic")
27
+
28
+ puts "Done. Initialize MeCab::Tagger"
29
+ mecab = MeCab::Tagger.new("-u thdic-mecab.dic")
30
+
31
+ text = Sosowa.get.sample.fetch.text.gsub(/(<br>|\r?\n)/, "")
32
+ tokens = mecab.parseToNode(text)
33
+ tokens.each do |token|
34
+ puts token.feature
35
+ end
data/sosowa.gemspec CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |gem|
5
5
  gem.authors = ["Oame"]
6
6
  gem.email = ["oame@oameya.com"]
7
7
  gem.description = %q{Sosowa Parser for Ruby}
8
- gem.summary = %q{Sosowa Parser for Ruby.}
8
+ gem.summary = %q{Sosowa Parser for Ruby 1.9.x.}
9
9
  gem.homepage = ""
10
10
 
11
11
  gem.files = `git ls-files`.split($\)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sosowa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: '0.1'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-09 00:00:00.000000000Z
12
+ date: 2012-07-10 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: &70129465130960 !ruby/object:Gem::Requirement
16
+ requirement: &70307433904340 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70129465130960
24
+ version_requirements: *70307433904340
25
25
  description: Sosowa Parser for Ruby
26
26
  email:
27
27
  - oame@oameya.com
@@ -30,6 +30,7 @@ extensions: []
30
30
  extra_rdoc_files: []
31
31
  files:
32
32
  - .gitignore
33
+ - CHANGELOG.md
33
34
  - Gemfile
34
35
  - LICENSE
35
36
  - README.md
@@ -38,6 +39,7 @@ files:
38
39
  - lib/sosowa/parser.rb
39
40
  - lib/sosowa/scheme.rb
40
41
  - lib/sosowa/version.rb
42
+ - samples/token_segment.rb
41
43
  - sosowa.gemspec
42
44
  homepage: ''
43
45
  licenses: []
@@ -62,6 +64,6 @@ rubyforge_project:
62
64
  rubygems_version: 1.8.10
63
65
  signing_key:
64
66
  specification_version: 3
65
- summary: Sosowa Parser for Ruby.
67
+ summary: Sosowa Parser for Ruby 1.9.x.
66
68
  test_files: []
67
69
  has_rdoc: