sosowa 0.0.2 → 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +6 -0
- data/README.md +8 -0
- data/lib/sosowa/parser.rb +12 -1
- data/lib/sosowa/scheme.rb +6 -1
- data/lib/sosowa/version.rb +2 -2
- data/lib/sosowa.rb +21 -3
- data/samples/token_segment.rb +35 -0
- data/sosowa.gemspec +1 -1
- metadata +7 -5
    
        data/CHANGELOG.md
    ADDED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -2,6 +2,11 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            Sosowa Parser for Ruby 1.9.x
         | 
| 4 4 |  | 
| 5 | 
            +
            ## Requirements
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            * Ruby 1.9.x
         | 
| 8 | 
            +
            * mechanize gem
         | 
| 9 | 
            +
             | 
| 5 10 | 
             
            ## Installation
         | 
| 6 11 |  | 
| 7 12 | 
             
            	gem install sosowa
         | 
| @@ -17,6 +22,9 @@ Sosowa Parser for Ruby 1.9.x | |
| 17 22 | 
             
            	# 作品集番号156の1320873807を持ってくる
         | 
| 18 23 | 
             
            	novel = Sosowa.get(:log => 156, :key => 1320873807)
         | 
| 19 24 | 
             
            	puts novel.text
         | 
| 25 | 
            +
            	
         | 
| 26 | 
            +
            	# "ナズーリン"がタイトルに含まれているSSの数を出力する
         | 
| 27 | 
            +
            	puts Sosowa.search("ナズーリン", :type => :title).size
         | 
| 20 28 |  | 
| 21 29 | 
             
            ## Contributing
         | 
| 22 30 |  | 
    
        data/lib/sosowa/parser.rb
    CHANGED
    
    | @@ -6,8 +6,18 @@ module Sosowa | |
| 6 6 | 
             
                  @agent.user_agent = "Sosowa Ruby #{Sosowa::VERSION}"
         | 
| 7 7 | 
             
                end
         | 
| 8 8 |  | 
| 9 | 
            +
                def search(query, args={})
         | 
| 10 | 
            +
                  params = Sosowa.serialize_parameter({:mode => :search, :type => (args[:type] ? args[:type] : :insubject), :query => query.tosjis})
         | 
| 11 | 
            +
                  parse_index(URI.join(Sosowa::BASE_URL, params))
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
                
         | 
| 9 14 | 
             
                def fetch_index(log)
         | 
| 10 | 
            -
                   | 
| 15 | 
            +
                  params = Sosowa.serialize_parameter({:log => log})
         | 
| 16 | 
            +
                  parse_index(URI.join(Sosowa::BASE_URL, params))
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
                
         | 
| 19 | 
            +
                def parse_index(url)
         | 
| 20 | 
            +
                  page = @agent.get(url)
         | 
| 11 21 | 
             
                  indexes = []
         | 
| 12 22 | 
             
                  tr = page.search("tr")
         | 
| 13 23 | 
             
                  tr = tr[1, tr.size-1]
         | 
| @@ -19,6 +29,7 @@ module Sosowa | |
| 19 29 | 
             
                    else
         | 
| 20 30 | 
             
                      title = tr.search(%{td[@class="title cell_title"] > a}).inner_html.to_s.toutf8.strip
         | 
| 21 31 | 
             
                      tags = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["title"].value.split(" / ")
         | 
| 32 | 
            +
                      log = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/log=(\d+)$/, '\1').to_i
         | 
| 22 33 | 
             
                      key = tr.search(%{td[@class="title cell_title"] > a})[0].attributes["href"].value.gsub(/^.+key=(.+?)&.+$/, '\1').to_i
         | 
| 23 34 | 
             
                      author = tr.search(%{td[@class="cell_author"]}).inner_html.to_s.toutf8.strip
         | 
| 24 35 | 
             
                      created_at = Time.parse(tr.search(%{td[@class="cell_created"]}).inner_html.to_s.toutf8.strip)
         | 
    
        data/lib/sosowa/scheme.rb
    CHANGED
    
    | @@ -22,7 +22,8 @@ module Sosowa | |
| 22 22 | 
             
                end
         | 
| 23 23 |  | 
| 24 24 | 
             
                def fetch(log, key)
         | 
| 25 | 
            -
                   | 
| 25 | 
            +
                  params = Sosowa.serialize_parameter({:mode => :read, :log => log, :key => key})
         | 
| 26 | 
            +
                  @page = @agent.get(URI.join(Sosowa::BASE_URL, params))
         | 
| 26 27 | 
             
                  tags = (@page/%{dl[@class="info"][1] > dd > a}).map{|t| t.inner_html.to_s.toutf8 }
         | 
| 27 28 | 
             
                  text = (@page/%{div[@class="contents ss"]})[0].inner_html.to_s.toutf8
         | 
| 28 29 | 
             
                  ps = (@page/%{div[@class="aft"]})[0].inner_html.to_s.toutf8
         | 
| @@ -78,6 +79,10 @@ module Sosowa | |
| 78 79 | 
             
                  form.field_with(:name => "point").option_with(:value => (params[:point].to_s || "0")).select
         | 
| 79 80 | 
             
                  form.click_button
         | 
| 80 81 | 
             
                end
         | 
| 82 | 
            +
                
         | 
| 83 | 
            +
                def plain
         | 
| 84 | 
            +
                  return @element[:text].gsub(/(<br>|\r?\n)/, "")
         | 
| 85 | 
            +
                end
         | 
| 81 86 | 
             
              end
         | 
| 82 87 |  | 
| 83 88 | 
             
              class Comment < Scheme
         | 
    
        data/lib/sosowa/version.rb
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 1 | 
             
            module Sosowa
         | 
| 2 | 
            -
              VERSION = "0. | 
| 3 | 
            -
            end
         | 
| 2 | 
            +
              VERSION = "0.1"
         | 
| 3 | 
            +
            end
         | 
    
        data/lib/sosowa.rb
    CHANGED
    
    | @@ -1,14 +1,27 @@ | |
| 1 | 
            -
            $LOAD_PATH.unshift(File.expand_path("../", __FILE__))
         | 
| 2 1 | 
             
            require "kconv"
         | 
| 3 2 | 
             
            require "mechanize"
         | 
| 4 3 | 
             
            require "time"
         | 
| 5 | 
            -
            require " | 
| 4 | 
            +
            require "uri"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            $LOAD_PATH.unshift(File.expand_path("../", __FILE__))
         | 
| 6 7 | 
             
            require "sosowa/version"
         | 
| 7 8 | 
             
            require "sosowa/scheme"
         | 
| 8 9 | 
             
            require "sosowa/parser"
         | 
| 9 10 |  | 
| 10 11 | 
             
            module Sosowa
         | 
| 11 | 
            -
              BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l"
         | 
| 12 | 
            +
              BASE_URL = "http://coolier.sytes.net:8080/sosowa/ssw_l/"
         | 
| 13 | 
            +
              
         | 
| 14 | 
            +
              # @param [Hash] parameter
         | 
| 15 | 
            +
              # @return [String] URL Serialized parameters
         | 
| 16 | 
            +
              def self.serialize_parameter parameter
         | 
| 17 | 
            +
                return "" unless parameter.class == Hash
         | 
| 18 | 
            +
                ant = Hash.new
         | 
| 19 | 
            +
                parameter.each do |key, value|
         | 
| 20 | 
            +
                  ant[key.to_sym] = value.to_s
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
                param = ant.inject(""){|k,v|k+"&#{v[0]}=#{URI.escape(v[1])}"}.sub!(/^&/,"?")
         | 
| 23 | 
            +
                return param ? param : ""
         | 
| 24 | 
            +
              end
         | 
| 12 25 |  | 
| 13 26 | 
             
              def self.get(args={})
         | 
| 14 27 | 
             
                args[:log] ||= 0
         | 
| @@ -19,4 +32,9 @@ module Sosowa | |
| 19 32 | 
             
                  parser.fetch_index(args[:log])
         | 
| 20 33 | 
             
                end
         | 
| 21 34 | 
             
              end
         | 
| 35 | 
            +
              
         | 
| 36 | 
            +
              def self.search(query, args={})
         | 
| 37 | 
            +
                parser = Parser.new
         | 
| 38 | 
            +
                parser.search(query, args)
         | 
| 39 | 
            +
              end
         | 
| 22 40 | 
             
            end
         | 
| @@ -0,0 +1,35 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            # coding: utf-8
         | 
| 3 | 
            +
            # 創想話の最新版から適当なSSを取得してMeCab(+ 東方MeCab辞書)を用いてトークナイズします。
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require "MeCab"
         | 
| 6 | 
            +
            require "sosowa"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            module MeCab
         | 
| 9 | 
            +
              class Tagger
         | 
| 10 | 
            +
                alias_method :parseToNode_org, :parseToNode
         | 
| 11 | 
            +
                private :parseToNode_org
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def parseToNode(*args)
         | 
| 14 | 
            +
                  node = parseToNode_org(*args)
         | 
| 15 | 
            +
                  nodes = []
         | 
| 16 | 
            +
                  while node
         | 
| 17 | 
            +
                    nodes.push(node)
         | 
| 18 | 
            +
                    node = node.next
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
                  return nodes[1, nodes.size - 2]
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
              end
         | 
| 23 | 
            +
            end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            puts "Fetching thdic-mecab..."
         | 
| 26 | 
            +
            system("curl -L https://github.com/oame/thdic-mecab/raw/master/pkg/thdic-mecab.dic > thdic-mecab.dic")
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            puts "Done. Initialize MeCab::Tagger"
         | 
| 29 | 
            +
            mecab = MeCab::Tagger.new("-u thdic-mecab.dic")
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            text = Sosowa.get.sample.fetch.text.gsub(/(<br>|\r?\n)/, "")
         | 
| 32 | 
            +
            tokens = mecab.parseToNode(text)
         | 
| 33 | 
            +
            tokens.each do |token|
         | 
| 34 | 
            +
              puts token.feature
         | 
| 35 | 
            +
            end
         | 
    
        data/sosowa.gemspec
    CHANGED
    
    | @@ -5,7 +5,7 @@ Gem::Specification.new do |gem| | |
| 5 5 | 
             
              gem.authors       = ["Oame"]
         | 
| 6 6 | 
             
              gem.email         = ["oame@oameya.com"]
         | 
| 7 7 | 
             
              gem.description   = %q{Sosowa Parser for Ruby}
         | 
| 8 | 
            -
              gem.summary       = %q{Sosowa Parser for Ruby.}
         | 
| 8 | 
            +
              gem.summary       = %q{Sosowa Parser for Ruby 1.9.x.}
         | 
| 9 9 | 
             
              gem.homepage      = ""
         | 
| 10 10 |  | 
| 11 11 | 
             
              gem.files         = `git ls-files`.split($\)
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: sosowa
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: '0.1'
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,11 +9,11 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2012-07- | 
| 12 | 
            +
            date: 2012-07-10 00:00:00.000000000Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: mechanize
         | 
| 16 | 
            -
              requirement: & | 
| 16 | 
            +
              requirement: &70307433904340 !ruby/object:Gem::Requirement
         | 
| 17 17 | 
             
                none: false
         | 
| 18 18 | 
             
                requirements:
         | 
| 19 19 | 
             
                - - ! '>='
         | 
| @@ -21,7 +21,7 @@ dependencies: | |
| 21 21 | 
             
                    version: '0'
         | 
| 22 22 | 
             
              type: :runtime
         | 
| 23 23 | 
             
              prerelease: false
         | 
| 24 | 
            -
              version_requirements: * | 
| 24 | 
            +
              version_requirements: *70307433904340
         | 
| 25 25 | 
             
            description: Sosowa Parser for Ruby
         | 
| 26 26 | 
             
            email:
         | 
| 27 27 | 
             
            - oame@oameya.com
         | 
| @@ -30,6 +30,7 @@ extensions: [] | |
| 30 30 | 
             
            extra_rdoc_files: []
         | 
| 31 31 | 
             
            files:
         | 
| 32 32 | 
             
            - .gitignore
         | 
| 33 | 
            +
            - CHANGELOG.md
         | 
| 33 34 | 
             
            - Gemfile
         | 
| 34 35 | 
             
            - LICENSE
         | 
| 35 36 | 
             
            - README.md
         | 
| @@ -38,6 +39,7 @@ files: | |
| 38 39 | 
             
            - lib/sosowa/parser.rb
         | 
| 39 40 | 
             
            - lib/sosowa/scheme.rb
         | 
| 40 41 | 
             
            - lib/sosowa/version.rb
         | 
| 42 | 
            +
            - samples/token_segment.rb
         | 
| 41 43 | 
             
            - sosowa.gemspec
         | 
| 42 44 | 
             
            homepage: ''
         | 
| 43 45 | 
             
            licenses: []
         | 
| @@ -62,6 +64,6 @@ rubyforge_project: | |
| 62 64 | 
             
            rubygems_version: 1.8.10
         | 
| 63 65 | 
             
            signing_key: 
         | 
| 64 66 | 
             
            specification_version: 3
         | 
| 65 | 
            -
            summary: Sosowa Parser for Ruby.
         | 
| 67 | 
            +
            summary: Sosowa Parser for Ruby 1.9.x.
         | 
| 66 68 | 
             
            test_files: []
         | 
| 67 69 | 
             
            has_rdoc: 
         |