anystyle-parser 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +28 -12
- data/HISTORY.md +6 -0
- data/LICENSE +2 -2
- data/README.md +11 -11
- data/Rakefile +14 -3
- data/anystyle-parser.gemspec +13 -8
- data/features/support/env.rb +18 -0
- data/lib/anystyle/parser/dictionary.rb +35 -37
- data/lib/anystyle/parser/errors.rb +18 -18
- data/lib/anystyle/parser/parser.rb +254 -244
- data/lib/anystyle/parser/utility.rb +18 -18
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/parser_spec.rb +119 -115
- data/spec/spec_helper.rb +9 -2
- metadata +26 -43
- data/.autotest +0 -0
- data/.gitignore +0 -5
- data/.rspec +0 -3
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 9723d7ea8fd46588c2487a72357c29fc9b1811ce
         | 
| 4 | 
            +
              data.tar.gz: bfe339c9e7ab8883cbef7d8ad4de1a6aba433b53
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: 18591aa6d5ab49057b57308ff4a38b0e99aa07b45acf244998b47e51653b892ca442a5def2f3ed547f74c4cf338355070749cd31ca44dec08d41d21b2f23912b
         | 
| 7 | 
            +
              data.tar.gz: 2790d3bd5f4fa9a86aae3be03b3dbb9847816911a173186c898def47f28c243f3bf12ccdedb34e914bd5549f3bdaeed24fcc47c014b628f71b96dacc0d39846e
         | 
    
        data/Gemfile
    CHANGED
    
    | @@ -1,19 +1,19 @@ | |
| 1 1 | 
             
            source 'https://rubygems.org'
         | 
| 2 2 | 
             
            gemspec
         | 
| 3 3 |  | 
| 4 | 
            -
            group :development do
         | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 4 | 
            +
            group :development, :test do
         | 
| 5 | 
            +
              gem 'rake'
         | 
| 6 | 
            +
              gem 'cucumber'
         | 
| 7 | 
            +
              gem 'rspec'
         | 
| 8 | 
            +
              gem 'simplecov', '~>0.8', :require => false
         | 
| 9 | 
            +
              gem 'rubinius-coverage', :platform => :rbx
         | 
| 10 | 
            +
              gem 'coveralls', :require => false
         | 
| 8 11 | 
             
            end
         | 
| 9 12 |  | 
| 10 | 
            -
            group : | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
            	gem 'cucumber'
         | 
| 15 | 
            -
            	gem 'rspec'
         | 
| 16 | 
            -
            	gem 'ZenTest'
         | 
| 13 | 
            +
            group :debug do
         | 
| 14 | 
            +
              gem 'debugger', '~>1.6', :require => false, :platform => :mri
         | 
| 15 | 
            +
              gem 'rubinius-compiler', '~>2.0', :require => false, :platform => :rbx
         | 
| 16 | 
            +
              gem 'rubinius-debugger', '~>2.0', :require => false, :platform => :rbx
         | 
| 17 17 | 
             
            end
         | 
| 18 18 |  | 
| 19 19 | 
             
            group :profile do
         | 
| @@ -22,6 +22,22 @@ group :profile do | |
| 22 22 | 
             
            end
         | 
| 23 23 |  | 
| 24 24 | 
             
            group :extra do
         | 
| 25 | 
            +
            	gem 'autotest-fsevent', :require => false
         | 
| 26 | 
            +
              gem 'yard'
         | 
| 27 | 
            +
            	gem 'ZenTest'
         | 
| 28 | 
            +
            end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            group :redis do
         | 
| 31 | 
            +
              gem 'redis'
         | 
| 32 | 
            +
              gem 'hiredis'
         | 
| 33 | 
            +
            end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            group :kyoto do
         | 
| 25 36 | 
             
            	gem 'kyotocabinet-ruby', :require => 'kyotocabinet'
         | 
| 26 | 
            -
             | 
| 37 | 
            +
            end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            platform :rbx do
         | 
| 40 | 
            +
              gem 'rubysl', '~>2.0'
         | 
| 41 | 
            +
              gem 'json', '~>1.8'
         | 
| 42 | 
            +
              gem 'racc'
         | 
| 27 43 | 
             
            end
         | 
    
        data/HISTORY.md
    CHANGED
    
    
    
        data/LICENSE
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            Copyright 2011- | 
| 1 | 
            +
            Copyright 2011-2014 Sylvester Keil. All rights reserved.
         | 
| 2 2 |  | 
| 3 3 | 
             
            Redistribution and use in source and binary forms, with or without
         | 
| 4 4 | 
             
            modification, are permitted provided that the following conditions are met:
         | 
| @@ -23,4 +23,4 @@ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 23 23 |  | 
| 24 24 | 
             
            The views and conclusions contained in the software and documentation are
         | 
| 25 25 | 
             
            those of the authors and should not be interpreted as representing official
         | 
| 26 | 
            -
            policies, either expressed or implied, of the copyright holder.
         | 
| 26 | 
            +
            policies, either expressed or implied, of the copyright holder.
         | 
    
        data/README.md
    CHANGED
    
    | @@ -1,5 +1,7 @@ | |
| 1 1 | 
             
            Anystyle-Parser
         | 
| 2 2 | 
             
            ===============
         | 
| 3 | 
            +
            [](https://travis-ci.org/inukshuk/anystyle-parser)
         | 
| 4 | 
            +
            [](https://coveralls.io/r/inukshuk/anystyle-parser)
         | 
| 3 5 |  | 
| 4 6 | 
             
            Anystyle-Parser is a very fast and smart parser for academic references. It
         | 
| 5 7 | 
             
            is inspired by [ParsCit](http://aye.comp.nus.edu.sg/parsCit/) and
         | 
| @@ -18,7 +20,7 @@ Installation | |
| 18 20 | 
             
                $ [sudo] gem install anystyle-parser
         | 
| 19 21 |  | 
| 20 22 | 
             
            During the statistical analysis of reference strings, Anystyle-Parser relies
         | 
| 21 | 
            -
            on a large feature dictionary; by default, Anystyle-Parser creates a | 
| 23 | 
            +
            on a large feature dictionary; by default, Anystyle-Parser creates a
         | 
| 22 24 | 
             
            [Kyoto Cabinet](http://fallabs.com/kyotocabinet/) file-based hash database
         | 
| 23 25 | 
             
            from the dictionary file that ships with the parser. If Kyoto Cabinet is
         | 
| 24 26 | 
             
            not installed on your system, Anystyle-Parser uses a simple Ruby Hash as a
         | 
| @@ -26,7 +28,7 @@ fall-back; this Hash has to be re-created every time you load the parser | |
| 26 28 | 
             
            and takes up a lot of memory in your Ruby process; it is therefore strongly
         | 
| 27 29 | 
             
            recommended to install Kyoto Cabinet and the `kyotocabinet-ruby` gem.
         | 
| 28 30 |  | 
| 29 | 
            -
                $ [sudo] gem install kyotocabinet-ruby | 
| 31 | 
            +
                $ [sudo] gem install kyotocabinet-ruby
         | 
| 30 32 |  | 
| 31 33 | 
             
            The database file will be created the first time you access the dictionary;
         | 
| 32 34 | 
             
            note that you will need write permissions in the directory where the file
         | 
| @@ -39,8 +41,8 @@ Starting with version 0.1.0, Anystyle-Parser also supports | |
| 39 41 | 
             
            [Redis](http://redis.io); to use Redis as the data store you need to install
         | 
| 40 42 | 
             
            the `redis` gem (and, optionally, the `hiredis` gem).
         | 
| 41 43 |  | 
| 42 | 
            -
                $ [sudo] gem install hiredis | 
| 43 | 
            -
                $ [sudo] gem install redis | 
| 44 | 
            +
                $ [sudo] gem install hiredis
         | 
| 45 | 
            +
                $ [sudo] gem install redis
         | 
| 44 46 |  | 
| 45 47 | 
             
            To see which data store modes are available in you current environment,
         | 
| 46 48 | 
             
            check the output of `Dictionary.modes`:
         | 
| @@ -52,7 +54,7 @@ To select one of the available modes, use the dictionary instance options: | |
| 52 54 |  | 
| 53 55 | 
             
                > Anystyle::Parser::Dictionary.instance.options[:mode]
         | 
| 54 56 | 
             
                => :kyoto
         | 
| 55 | 
            -
             | 
| 57 | 
            +
             | 
| 56 58 | 
             
            To use [Redis](http://redis.io) you also need to set the host or unix socket
         | 
| 57 59 | 
             
            where your redis server is available. For example:
         | 
| 58 60 |  | 
| @@ -84,7 +86,7 @@ Anystyle-Parser are `#parse` and `#train` that both accept two arguments. | |
| 84 86 | 
             
            `#parse` parses the passed-in input (either a filename, your reference strings,
         | 
| 85 87 | 
             
            or an array of your reference strings) and returns the parsed data in the
         | 
| 86 88 | 
             
            format specified as the second argument (supported formats include: *:hash*,
         | 
| 87 | 
            -
            *:bibtex*, *:citeproc*, and *: | 
| 89 | 
            +
            *:bibtex*, *:citeproc*, *:tags*, and *:raw*).
         | 
| 88 90 |  | 
| 89 91 | 
             
            `#train` allows you to easily train the Parser's CRF model. The first argument
         | 
| 90 92 | 
             
            is either a filename or your data as a string; the format of training data
         | 
| @@ -113,7 +115,7 @@ The following irb sessions illustrates some parser goodness: | |
| 113 115 | 
             
                  year = {1997}
         | 
| 114 116 | 
             
                }
         | 
| 115 117 | 
             
                => nil
         | 
| 116 | 
            -
             | 
| 118 | 
            +
             | 
| 117 119 | 
             
            ### Unhappy with the results?
         | 
| 118 120 |  | 
| 119 121 | 
             
            Citation references come in many forms, so, inevitably, you will find data
         | 
| @@ -159,11 +161,10 @@ data again: | |
| 159 161 | 
             
                => [{:author=>"John Lafferty and Andrew McCallum and Fernando Pereira", :title=>"Conditional random fields: probabilistic models for segmenting and labeling sequence data", :booktitle=>"Proceedings of the International Conference on Machine Learning", :pages=>"282--289", :publisher=>"Morgan Kaufmann", :location=>"San Francisco, CA", :year=>2001, :type=>:inproceedings}]
         | 
| 160 162 |  | 
| 161 163 | 
             
            If you want to make Anystyle-Parser smarter, please consider sending us your
         | 
| 162 | 
            -
            tagged references (see below). | 
| 164 | 
            +
            tagged references (see below).
         | 
| 163 165 |  | 
| 164 166 | 
             
            Contributing
         | 
| 165 167 | 
             
            ------------
         | 
| 166 | 
            -
             | 
| 167 168 | 
             
            The Anystyle-Parser source code is
         | 
| 168 169 | 
             
            [hosted on GitHub](http://github.com/inukshuk/anystyle-parser/).
         | 
| 169 170 | 
             
            You can check out a copy of the latest code using Git:
         | 
| @@ -182,8 +183,7 @@ and open a pull request on GitHub. | |
| 182 183 |  | 
| 183 184 | 
             
            License
         | 
| 184 185 | 
             
            -------
         | 
| 185 | 
            -
             | 
| 186 | 
            -
            Copyright 2011-2012 Sylvester Keil. All rights reserved.
         | 
| 186 | 
            +
            Copyright 2011-2014 Sylvester Keil. All rights reserved.
         | 
| 187 187 |  | 
| 188 188 | 
             
            Some of the code in Anystyle-Parser's post processing (normalizing) routines
         | 
| 189 189 | 
             
            was originally based on the source code of FreeCite and
         | 
    
        data/Rakefile
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            require 'bundler'
         | 
| 2 2 | 
             
            begin
         | 
| 3 | 
            -
              Bundler.setup | 
| 3 | 
            +
              Bundler.setup
         | 
| 4 4 | 
             
            rescue Bundler::BundlerError => e
         | 
| 5 5 | 
             
              $stderr.puts e.message
         | 
| 6 6 | 
             
              $stderr.puts "Run `bundle install` to install missing gems"
         | 
| @@ -23,6 +23,13 @@ task :release => [:build] do | |
| 23 23 | 
             
              system "gem push anystyle-parser-#{Anystyle::Parser::VERSION}.gem"
         | 
| 24 24 | 
             
            end
         | 
| 25 25 |  | 
| 26 | 
            +
            task :check_warnings do
         | 
| 27 | 
            +
              $VERBOSE = true
         | 
| 28 | 
            +
              require 'anystyle/parser'
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              puts Anystyle::Parser::VERSION
         | 
| 31 | 
            +
            end
         | 
| 32 | 
            +
             | 
| 26 33 | 
             
            require 'rspec/core'
         | 
| 27 34 | 
             
            require 'rspec/core/rake_task'
         | 
| 28 35 | 
             
            RSpec::Core::RakeTask.new(:spec) do |spec|
         | 
| @@ -32,6 +39,10 @@ end | |
| 32 39 | 
             
            require 'cucumber/rake/task'
         | 
| 33 40 | 
             
            Cucumber::Rake::Task.new(:features)
         | 
| 34 41 |  | 
| 42 | 
            +
            require 'coveralls/rake/task'
         | 
| 43 | 
            +
            Coveralls::RakeTask.new
         | 
| 44 | 
            +
            task :test_with_coveralls => [:spec, 'coveralls:push']
         | 
| 45 | 
            +
             | 
| 35 46 | 
             
            task :default => :spec
         | 
| 36 47 |  | 
| 37 48 | 
             
            begin
         | 
| @@ -41,13 +52,13 @@ rescue LoadError | |
| 41 52 | 
             
              # ignore
         | 
| 42 53 | 
             
            end
         | 
| 43 54 |  | 
| 44 | 
            -
            desc 'Run an IRB session with  | 
| 55 | 
            +
            desc 'Run an IRB session with Anystyle-Parser loaded'
         | 
| 45 56 | 
             
            task :console, [:script] do |t, args|
         | 
| 46 57 | 
             
              ARGV.clear
         | 
| 47 58 |  | 
| 48 59 | 
             
              require 'irb'
         | 
| 49 60 | 
             
              require 'anystyle/parser'
         | 
| 50 | 
            -
             | 
| 61 | 
            +
             | 
| 51 62 | 
             
              IRB.conf[:SCRIPT] = args.script
         | 
| 52 63 | 
             
              IRB.start
         | 
| 53 64 | 
             
            end
         | 
    
        data/anystyle-parser.gemspec
    CHANGED
    
    | @@ -11,22 +11,27 @@ Gem::Specification.new do |s| | |
| 11 11 | 
             
              s.authors     = ['Sylvester Keil']
         | 
| 12 12 | 
             
              s.email       = ['http://sylvester.keil.or.at']
         | 
| 13 13 | 
             
              s.homepage    = 'http://github.com/inukshuk/anystyle-parser'
         | 
| 14 | 
            -
              s.summary     = ' | 
| 15 | 
            -
              s.description = 'A sophisticated parser for academic  | 
| 14 | 
            +
              s.summary     = 'Smart and fast academic bibliography parser.'
         | 
| 15 | 
            +
              s.description = 'A sophisticated parser for academic reference lists and bibliographies based on machine learning algorithms using conditional random fields.'
         | 
| 16 16 | 
             
              s.license     = 'FreeBSD'
         | 
| 17 | 
            -
             | 
| 18 | 
            -
              s. | 
| 17 | 
            +
             | 
| 18 | 
            +
              s.required_ruby_version = '>= 1.9.3'
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              s.add_runtime_dependency('bibtex-ruby', '~>3.0')
         | 
| 19 21 | 
             
              s.add_runtime_dependency('wapiti', '~>0.0')
         | 
| 20 | 
            -
              s.add_runtime_dependency('namae', '~>0. | 
| 22 | 
            +
              s.add_runtime_dependency('namae', '~>0.8')
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              s.files        = `git ls-files`.split("\n").reject { |path|
         | 
| 25 | 
            +
                path.start_with?('.')
         | 
| 26 | 
            +
              } - Dir['resources/**/*']
         | 
| 21 27 |  | 
| 22 | 
            -
              s.files        = `git ls-files`.split("\n") - Dir['resources/**/*']
         | 
| 23 28 | 
             
              s.test_files   = `git ls-files -- {test,spec,features}/*`.split("\n")
         | 
| 24 29 | 
             
              s.executables  = []
         | 
| 25 30 | 
             
              s.require_path = 'lib'
         | 
| 26 31 |  | 
| 27 32 | 
             
              s.rdoc_options      = %w{--line-numbers --inline-source --title "Anystyle\ Parser" --main README.md}
         | 
| 28 33 | 
             
              s.extra_rdoc_files  = %w{README.md LICENSE}
         | 
| 29 | 
            -
             | 
| 34 | 
            +
             | 
| 30 35 | 
             
            end
         | 
| 31 36 |  | 
| 32 | 
            -
            # vim: syntax=ruby
         | 
| 37 | 
            +
            # vim: syntax=ruby
         | 
    
        data/features/support/env.rb
    CHANGED
    
    | @@ -1 +1,19 @@ | |
| 1 | 
            +
            begin
         | 
| 2 | 
            +
              require 'simplecov'
         | 
| 3 | 
            +
              require 'coveralls' if ENV['CI']
         | 
| 4 | 
            +
            rescue LoadError
         | 
| 5 | 
            +
              # ignore
         | 
| 6 | 
            +
            end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            begin
         | 
| 9 | 
            +
              case
         | 
| 10 | 
            +
              when defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
         | 
| 11 | 
            +
                require 'rubinius/debugger'
         | 
| 12 | 
            +
              else
         | 
| 13 | 
            +
                require 'debugger'
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
            rescue LoadError
         | 
| 16 | 
            +
              # ignore
         | 
| 17 | 
            +
            end
         | 
| 18 | 
            +
             | 
| 1 19 | 
             
            require 'anystyle/parser'
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            module Anystyle
         | 
| 2 2 | 
             
              module Parser
         | 
| 3 | 
            -
             | 
| 3 | 
            +
             | 
| 4 4 | 
             
                # Dictionary is a Singleton object that provides a key-value store of
         | 
| 5 5 | 
             
                # the Anystyle Parser dictionary required for feature elicitation.
         | 
| 6 6 | 
             
                # This dictionary acts essentially like a Ruby Hash object, but because
         | 
| @@ -11,8 +11,8 @@ module Anystyle | |
| 11 11 | 
             
                #
         | 
| 12 12 | 
             
                # Starting with version 0.1.0 Redis support was added. If you would
         | 
| 13 13 | 
             
                # like to use Redis as the dictionary data store you can do so by
         | 
| 14 | 
            -
                # 
         | 
| 15 | 
            -
                # | 
| 14 | 
            +
                # installing `redis' gem (and optionally the `hiredis' gem).
         | 
| 15 | 
            +
                #
         | 
| 16 16 | 
             
                # The database will be automatically created from the dictionary file
         | 
| 17 17 | 
             
                # using the best available DBM the first time it is accessed. Once
         | 
| 18 18 | 
             
                # database file exists, the database will be restored from file.
         | 
| @@ -46,7 +46,7 @@ module Anystyle | |
| 46 46 | 
             
                class Dictionary
         | 
| 47 47 |  | 
| 48 48 | 
             
                  include Singleton
         | 
| 49 | 
            -
             | 
| 49 | 
            +
             | 
| 50 50 | 
             
                  @keys = [:male, :female, :surname, :month, :place, :publisher, :journal].freeze
         | 
| 51 51 |  | 
| 52 52 | 
             
                  @code = Hash[*@keys.zip(0.upto(@keys.length-1).map { |i| 2**i }).flatten]
         | 
| @@ -67,42 +67,40 @@ module Anystyle | |
| 67 67 | 
             
                  rescue LoadError
         | 
| 68 68 | 
             
                    # info 'no redis support detected'
         | 
| 69 69 | 
             
                  end
         | 
| 70 | 
            -
             | 
| 70 | 
            +
             | 
| 71 71 | 
             
                  begin
         | 
| 72 72 | 
             
                    require 'kyotocabinet'
         | 
| 73 73 | 
             
                    @modes.unshift :kyoto
         | 
| 74 74 | 
             
                  rescue LoadError
         | 
| 75 75 | 
             
                    # info 'no kyoto-cabinet support detected'
         | 
| 76 76 | 
             
                  end
         | 
| 77 | 
            -
             | 
| 77 | 
            +
             | 
| 78 78 | 
             
                  @defaults = {
         | 
| 79 79 | 
             
                    :mode => @modes[0],
         | 
| 80 80 | 
             
                    :source => File.expand_path('../support/dict.txt.gz', __FILE__),
         | 
| 81 81 | 
             
                    :cabinet => File.expand_path('../support/dict.kch', __FILE__),
         | 
| 82 82 | 
             
                    :port => 6379
         | 
| 83 83 | 
             
                  }.freeze
         | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 84 | 
            +
             | 
| 85 | 
            +
             | 
| 86 86 | 
             
                  class << self
         | 
| 87 | 
            -
                    
         | 
| 88 87 | 
             
                    attr_reader :keys, :code, :defaults, :modes
         | 
| 89 | 
            -
                    
         | 
| 90 88 | 
             
                  end
         | 
| 91 89 |  | 
| 92 90 | 
             
                  attr_reader :options
         | 
| 93 | 
            -
             | 
| 91 | 
            +
             | 
| 94 92 | 
             
                  def initialize
         | 
| 95 93 | 
             
                    @options = Dictionary.defaults.dup
         | 
| 96 94 | 
             
                  end
         | 
| 97 | 
            -
             | 
| 95 | 
            +
             | 
| 98 96 | 
             
                  def [](key)
         | 
| 99 97 | 
             
                    db[key.to_s].to_i
         | 
| 100 98 | 
             
                  end
         | 
| 101 | 
            -
             | 
| 99 | 
            +
             | 
| 102 100 | 
             
                  def []=(key, value)
         | 
| 103 101 | 
             
                    db[key.to_s] = value
         | 
| 104 102 | 
             
                  end
         | 
| 105 | 
            -
             | 
| 103 | 
            +
             | 
| 106 104 | 
             
                  def create
         | 
| 107 105 | 
             
                    case options[:mode]
         | 
| 108 106 | 
             
                    when :kyoto
         | 
| @@ -113,61 +111,61 @@ module Anystyle | |
| 113 111 | 
             
                      end
         | 
| 114 112 | 
             
                      populate
         | 
| 115 113 | 
             
                      close
         | 
| 116 | 
            -
             | 
| 114 | 
            +
             | 
| 117 115 | 
             
                    when :redis
         | 
| 118 116 | 
             
                      @db ||= Redis.new(options)
         | 
| 119 117 | 
             
                      populate
         | 
| 120 118 | 
             
                      close
         | 
| 121 | 
            -
             | 
| 119 | 
            +
             | 
| 122 120 | 
             
                    else
         | 
| 123 121 | 
             
                      # nothing
         | 
| 124 122 | 
             
                    end
         | 
| 125 123 | 
             
                  end
         | 
| 126 | 
            -
             | 
| 124 | 
            +
             | 
| 127 125 | 
             
                  def truncate
         | 
| 128 126 | 
             
                    close
         | 
| 129 | 
            -
                    File.unlink(path) if File.exists?(path) | 
| 127 | 
            +
                    File.unlink(path) if File.exists?(path)
         | 
| 130 128 | 
             
                  end
         | 
| 131 | 
            -
             | 
| 129 | 
            +
             | 
| 132 130 | 
             
                  def open
         | 
| 133 131 | 
             
                    case options[:mode]
         | 
| 134 132 | 
             
                    when :kyoto
         | 
| 135 133 | 
             
                      at_exit { ::Anystyle::Parser::Dictionary.instance.close }
         | 
| 136 134 |  | 
| 137 135 | 
             
                      create unless File.exists?(path)
         | 
| 138 | 
            -
             | 
| 136 | 
            +
             | 
| 139 137 | 
             
                      @db = KyotoCabinet::DB.new
         | 
| 140 138 | 
             
                      unless @db.open(path, KyotoCabinet::DB::OREADER)
         | 
| 141 139 | 
             
                        raise DictionaryError, "failed to open cabinet file #{path}: #{@db.error}"
         | 
| 142 140 | 
             
                      end
         | 
| 143 | 
            -
             | 
| 141 | 
            +
             | 
| 144 142 | 
             
                    when :redis
         | 
| 145 143 | 
             
                      at_exit { ::Anystyle::Parser::Dictionary.instance.close }
         | 
| 146 144 | 
             
                      @db = Redis.new(options)
         | 
| 147 | 
            -
             | 
| 145 | 
            +
             | 
| 148 146 | 
             
                      populate if @db.dbsize.zero?
         | 
| 149 | 
            -
             | 
| 147 | 
            +
             | 
| 150 148 | 
             
                    else
         | 
| 151 149 | 
             
                      @db = Hash.new(0)
         | 
| 152 150 | 
             
                      populate
         | 
| 153 151 | 
             
                    end
         | 
| 154 | 
            -
             | 
| 152 | 
            +
             | 
| 155 153 | 
             
                    @db
         | 
| 156 154 | 
             
                  end
         | 
| 157 | 
            -
             | 
| 155 | 
            +
             | 
| 158 156 | 
             
                  def open?; !!@db; end
         | 
| 159 | 
            -
             | 
| 157 | 
            +
             | 
| 160 158 | 
             
                  def close
         | 
| 161 159 | 
             
                    case
         | 
| 162 160 | 
             
                    when @db.respond_to?(:close)
         | 
| 163 | 
            -
                      @db.close | 
| 161 | 
            +
                      @db.close
         | 
| 164 162 | 
             
                    when @db.respond_to?(:quit)
         | 
| 165 163 | 
             
                      @db.quit
         | 
| 166 164 | 
             
                    end
         | 
| 167 | 
            -
             | 
| 165 | 
            +
             | 
| 168 166 | 
             
                    @db = nil
         | 
| 169 167 | 
             
                  end
         | 
| 170 | 
            -
             | 
| 168 | 
            +
             | 
| 171 169 | 
             
                  def path
         | 
| 172 170 | 
             
                    case options[:mode]
         | 
| 173 171 | 
             
                    when :kyoto
         | 
| @@ -178,13 +176,13 @@ module Anystyle | |
| 178 176 | 
             
                      'hash'
         | 
| 179 177 | 
             
                    end
         | 
| 180 178 | 
             
                  end
         | 
| 181 | 
            -
             | 
| 179 | 
            +
             | 
| 182 180 | 
             
                  private
         | 
| 183 | 
            -
             | 
| 181 | 
            +
             | 
| 184 182 | 
             
                  def db
         | 
| 185 183 | 
             
                    @db || open
         | 
| 186 184 | 
             
                  end
         | 
| 187 | 
            -
             | 
| 185 | 
            +
             | 
| 188 186 | 
             
                  def populate
         | 
| 189 187 | 
             
                    require 'zlib'
         | 
| 190 188 |  | 
| @@ -193,7 +191,7 @@ module Anystyle | |
| 193 191 |  | 
| 194 192 | 
             
                      Zlib::GzipReader.new(f).each do |line|
         | 
| 195 193 | 
             
                        line.strip!
         | 
| 196 | 
            -
             | 
| 194 | 
            +
             | 
| 197 195 | 
             
                        if line.start_with?('#')
         | 
| 198 196 | 
             
                          case line
         | 
| 199 197 | 
             
                          when /^## male/i
         | 
| @@ -214,7 +212,7 @@ module Anystyle | |
| 214 212 | 
             
                            # skip comments
         | 
| 215 213 | 
             
                          end
         | 
| 216 214 | 
             
                        else
         | 
| 217 | 
            -
                          key | 
| 215 | 
            +
                          key = line.split(/\s+(\d+\.\d+)\s*$/)[0]
         | 
| 218 216 | 
             
                          value = self[key]
         | 
| 219 217 | 
             
                          self[key] = value + mode if value < mode
         | 
| 220 218 | 
             
                        end
         | 
| @@ -222,8 +220,8 @@ module Anystyle | |
| 222 220 | 
             
                    end
         | 
| 223 221 |  | 
| 224 222 | 
             
                  end
         | 
| 225 | 
            -
             | 
| 223 | 
            +
             | 
| 226 224 | 
             
                end
         | 
| 227 | 
            -
             | 
| 225 | 
            +
             | 
| 228 226 | 
             
              end
         | 
| 229 | 
            -
            end
         | 
| 227 | 
            +
            end
         |