harmonious_dictionary 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/CHANGELOG +3 -0
- data/MIT-LICENSE +20 -0
- data/README.markdown +53 -0
- data/Rakefile +8 -0
- data/benchmark/benchmark.rb +43 -0
- data/benchmark/text_test_100.txt +4 -0
- data/benchmark/text_test_1000.txt +25 -0
- data/benchmark/text_test_10000.txt +219 -0
- data/bin/harmonious_rseg +11 -0
- data/bin/harmonious_server +63 -0
- data/harmonious_dictionary.gemspec +20 -0
- data/lib/generators/harmonious_dictionary/setup/setup_generator.rb +16 -0
- data/lib/generators/harmonious_dictionary/setup/templates/chinese_dictionary.txt +0 -0
- data/lib/generators/harmonious_dictionary/setup/templates/english_dictionary.txt +0 -0
- data/lib/generators/harmonious_dictionary/setup/templates/remote_server.yml +8 -0
- data/lib/harmonious_dictionary.rb +48 -0
- data/lib/harmonious_dictionary/app.rb +18 -0
- data/lib/harmonious_dictionary/engines/dict.rb +51 -0
- data/lib/harmonious_dictionary/engines/engine.rb +21 -0
- data/lib/harmonious_dictionary/engines/english.rb +27 -0
- data/lib/harmonious_dictionary/filters/conjunction.rb +11 -0
- data/lib/harmonious_dictionary/filters/fullwidth.rb +21 -0
- data/lib/harmonious_dictionary/filters/symbol.rb +16 -0
- data/lib/harmonious_dictionary/model_additions.rb +15 -0
- data/lib/harmonious_dictionary/railtie.rb +23 -0
- data/lib/harmonious_dictionary/rseg.rb +170 -0
- data/lib/harmonious_dictionary/version.rb +3 -0
- data/lib/tasks/generate_dictionary.rake +55 -0
- data/spec/harmonious_dictionary_spec.rb +40 -0
- data/spec/model_additions_spec.rb +57 -0
- data/spec/spec_helper.rb +21 -0
- metadata +99 -0
| @@ -0,0 +1,55 @@ | |
| 1 | 
            +
            require 'yaml'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            namespace :harmonious_dictionary do
         | 
| 4 | 
            +
              desc "generate harmonious dictionary for use"
         | 
| 5 | 
            +
              task :generate => :environment do
         | 
| 6 | 
            +
                chinese_dictionary_path = File.join(Rails.root, 'config','harmonious_dictionary','chinese_dictionary.txt')
         | 
| 7 | 
            +
                english_dictionary_path = File.join(Rails.root, 'config','harmonious_dictionary','english_dictionary.txt')
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                puts "Processing chinese words..."
         | 
| 10 | 
            +
                tree = {}
         | 
| 11 | 
            +
                process(chinese_dictionary_path, tree)
         | 
| 12 | 
            +
                File.open(hash_path, "wb") {|io| Marshal.dump(tree, io)}  
         | 
| 13 | 
            +
                puts 'Done'
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                puts 'Processing english words...'
         | 
| 16 | 
            +
                english_dictionary = []
         | 
| 17 | 
            +
                process_english_words(english_dictionary_path,english_dictionary)
         | 
| 18 | 
            +
                File.open(yaml_path, "wb") {|io| YAML::dump(english_dictionary, io)} 
         | 
| 19 | 
            +
                puts 'Done'
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
            end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            def process_english_words(path,list)
         | 
| 24 | 
            +
              File.open(path, 'r') do |file|
         | 
| 25 | 
            +
                file.each_line{|line| list << line.gsub!("\n",'') }
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
            end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            def process(path, tree)
         | 
| 30 | 
            +
              File.open(path, 'r') do |file|
         | 
| 31 | 
            +
                file.each_line do |line|
         | 
| 32 | 
            +
                  node = nil
         | 
| 33 | 
            +
                  line.chars.each do |c|
         | 
| 34 | 
            +
                    next if c == "\n" || c == "\r"
         | 
| 35 | 
            +
                    if node
         | 
| 36 | 
            +
                      node[c] ||= {}
         | 
| 37 | 
            +
                      node = node[c]
         | 
| 38 | 
            +
                    else
         | 
| 39 | 
            +
                      tree[c] ||= Hash.new
         | 
| 40 | 
            +
                      node = tree[c]
         | 
| 41 | 
            +
                    end
         | 
| 42 | 
            +
                  end
         | 
| 43 | 
            +
                  node[:end] = true
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
            end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            def hash_path
         | 
| 49 | 
            +
              File.join(Rails.root, 'config','harmonious_dictionary','harmonious.hash')
         | 
| 50 | 
            +
            end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            def yaml_path
         | 
| 53 | 
            +
              File.join(Rails.root, 'config','harmonious_dictionary','harmonious_english.yml')
         | 
| 54 | 
            +
            end
         | 
| 55 | 
            +
             | 
| @@ -0,0 +1,40 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'spec_helper'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            describe HarmoniousDictionary do
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              describe 'local' do
         | 
| 8 | 
            +
                describe 'segment' do
         | 
| 9 | 
            +
                  it 'should return harmonious word for sentence' do
         | 
| 10 | 
            +
                    HarmoniousDictionary.harmonious_words('戴秉国在中国').should == ['戴秉国']
         | 
| 11 | 
            +
                  end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  it 'should return english,url and chiese words' do
         | 
| 14 | 
            +
                    HarmoniousDictionary.harmonious_words('戴秉国 in china,watch cctv.com.let fuck it').should == ['戴秉国','fuck']
         | 
| 15 | 
            +
                  end
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                it 'should find harmonious chinese  words' do
         | 
| 19 | 
            +
                  HarmoniousDictionary.clean?('李源潮在中国').should == false
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                it 'should pass good words' do
         | 
| 23 | 
            +
                  HarmoniousDictionary.clean?('过去').should == true
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                it 'should clean sentence by replace harmonious words by *' do
         | 
| 27 | 
            +
                  HarmoniousDictionary.clean('戴秉国在中国').should == '***在中国'
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                it 'should replace harmonious with *' do
         | 
| 31 | 
            +
                  HarmoniousDictionary.clean_word_basic('大米').should == '**'
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              # describe 'use remote' do
         | 
| 36 | 
            +
              #   it 'should use remote server for segment' do
         | 
| 37 | 
            +
              #     HarmoniousDictionary.clean_by_remote('戴秉国在中国').should == '***在中国'
         | 
| 38 | 
            +
              #   end
         | 
| 39 | 
            +
              # end
         | 
| 40 | 
            +
            end
         | 
| @@ -0,0 +1,57 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
            require 'spec_helper'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            ActiveRecord::Schema.define(:version => 1) do
         | 
| 7 | 
            +
              create_table :posts do |t|
         | 
| 8 | 
            +
                t.string :title
         | 
| 9 | 
            +
                t.text   :body
         | 
| 10 | 
            +
                t.text   :note
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
            end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            class Post < ActiveRecord::Base
         | 
| 15 | 
            +
              extend HarmoniousDictionary::ModelAdditions
         | 
| 16 | 
            +
              validate_harmonious_of :title,:body
         | 
| 17 | 
            +
            end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            describe HarmoniousDictionary::ModelAdditions do
         | 
| 20 | 
            +
              let(:post) { @post = Post.create title:'戴秉国在中国',body:'戴秉国在中国',note:'戴秉国在中国' }
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              describe 'use local' do
         | 
| 23 | 
            +
                it 'should validate for harmonious' do
         | 
| 24 | 
            +
                  post.errors[:title].should == ['不能含有敏感词']
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                it 'should have error on title' do
         | 
| 28 | 
            +
                  post.errors[:body].should == ['不能含有敏感词']
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                it 'should allow empty input value' do
         | 
| 32 | 
            +
                  p = Post.create body:'戴秉国在中国',note:'戴秉国在中国'
         | 
| 33 | 
            +
                  post.errors[:body].should == ['不能含有敏感词']
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                it 'should filter! any harmonious words' do
         | 
| 37 | 
            +
                  HarmoniousDictionary.clean(post.body).should == '***在中国'
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
              end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
              # 以后再开放
         | 
| 42 | 
            +
              # describe 'use remote' do
         | 
| 43 | 
            +
              #   before(:each) do
         | 
| 44 | 
            +
              #     configuration = double("configuration")
         | 
| 45 | 
            +
              #     @double_harmonious_dictionary = double('harmonious_dictionary')
         | 
| 46 | 
            +
              #     configuration.stub(:harmonious_dictionary){ @double_harmonious_dictionary }
         | 
| 47 | 
            +
              #     @double_harmonious_dictionary.stub(:use_remote_server){ true }
         | 
| 48 | 
            +
              #     Rails.stub(:configuration){configuration}
         | 
| 49 | 
            +
              #   end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
              #   it 'should validate for harmonious' do
         | 
| 52 | 
            +
              #     HarmoniousDictionary.should_receive(:clean_by_remote?)
         | 
| 53 | 
            +
              #     post.errors[:title].should == ['不能含有敏感词']
         | 
| 54 | 
            +
              #   end    
         | 
| 55 | 
            +
              # end
         | 
| 56 | 
            +
            end
         | 
| 57 | 
            +
             | 
    
        data/spec/spec_helper.rb
    ADDED
    
    | @@ -0,0 +1,21 @@ | |
| 1 | 
            +
            require 'rubygems'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rails'
         | 
| 4 | 
            +
            require 'active_model'
         | 
| 5 | 
            +
            require 'active_record'
         | 
| 6 | 
            +
            require 'harmonious_dictionary'
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            RSpec.configure do |config|
         | 
| 9 | 
            +
              config.color_enabled = true
         | 
| 10 | 
            +
              config.formatter     = 'documentation'
         | 
| 11 | 
            +
            end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            module Rails
         | 
| 14 | 
            +
              def self.root
         | 
| 15 | 
            +
                File.join File.dirname(__FILE__),'../'
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              def self.env
         | 
| 19 | 
            +
                'test'
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,99 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: harmonious_dictionary
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.0.1
         | 
| 5 | 
            +
              prerelease: 
         | 
| 6 | 
            +
            platform: ruby
         | 
| 7 | 
            +
            authors:
         | 
| 8 | 
            +
            - Stephen Kong
         | 
| 9 | 
            +
            autorequire: 
         | 
| 10 | 
            +
            bindir: bin
         | 
| 11 | 
            +
            cert_chain: []
         | 
| 12 | 
            +
            date: 2012-12-03 00:00:00.000000000 Z
         | 
| 13 | 
            +
            dependencies:
         | 
| 14 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 15 | 
            +
              name: rspec
         | 
| 16 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 17 | 
            +
                none: false
         | 
| 18 | 
            +
                requirements:
         | 
| 19 | 
            +
                - - ! '>='
         | 
| 20 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 21 | 
            +
                    version: '0'
         | 
| 22 | 
            +
              type: :development
         | 
| 23 | 
            +
              prerelease: false
         | 
| 24 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 25 | 
            +
                none: false
         | 
| 26 | 
            +
                requirements:
         | 
| 27 | 
            +
                - - ! '>='
         | 
| 28 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 29 | 
            +
                    version: '0'
         | 
| 30 | 
            +
            description: 和谐宝典用于检查输入是否包含中文或英文敏感词,并可替换为特殊字符。速度比常规的正则匹配要快10倍以上。生活在天朝,和谐宝典必须人手必备。
         | 
| 31 | 
            +
            email:
         | 
| 32 | 
            +
            - wear63659220@gmail.com
         | 
| 33 | 
            +
            executables:
         | 
| 34 | 
            +
            - harmonious_rseg
         | 
| 35 | 
            +
            - harmonious_server
         | 
| 36 | 
            +
            extensions: []
         | 
| 37 | 
            +
            extra_rdoc_files: []
         | 
| 38 | 
            +
            files:
         | 
| 39 | 
            +
            - .gitignore
         | 
| 40 | 
            +
            - .rspec
         | 
| 41 | 
            +
            - CHANGELOG
         | 
| 42 | 
            +
            - MIT-LICENSE
         | 
| 43 | 
            +
            - README.markdown
         | 
| 44 | 
            +
            - Rakefile
         | 
| 45 | 
            +
            - benchmark/benchmark.rb
         | 
| 46 | 
            +
            - benchmark/text_test_100.txt
         | 
| 47 | 
            +
            - benchmark/text_test_1000.txt
         | 
| 48 | 
            +
            - benchmark/text_test_10000.txt
         | 
| 49 | 
            +
            - bin/harmonious_rseg
         | 
| 50 | 
            +
            - bin/harmonious_server
         | 
| 51 | 
            +
            - harmonious_dictionary.gemspec
         | 
| 52 | 
            +
            - lib/generators/harmonious_dictionary/setup/setup_generator.rb
         | 
| 53 | 
            +
            - lib/generators/harmonious_dictionary/setup/templates/chinese_dictionary.txt
         | 
| 54 | 
            +
            - lib/generators/harmonious_dictionary/setup/templates/english_dictionary.txt
         | 
| 55 | 
            +
            - lib/generators/harmonious_dictionary/setup/templates/remote_server.yml
         | 
| 56 | 
            +
            - lib/harmonious_dictionary.rb
         | 
| 57 | 
            +
            - lib/harmonious_dictionary/app.rb
         | 
| 58 | 
            +
            - lib/harmonious_dictionary/engines/dict.rb
         | 
| 59 | 
            +
            - lib/harmonious_dictionary/engines/engine.rb
         | 
| 60 | 
            +
            - lib/harmonious_dictionary/engines/english.rb
         | 
| 61 | 
            +
            - lib/harmonious_dictionary/filters/conjunction.rb
         | 
| 62 | 
            +
            - lib/harmonious_dictionary/filters/fullwidth.rb
         | 
| 63 | 
            +
            - lib/harmonious_dictionary/filters/symbol.rb
         | 
| 64 | 
            +
            - lib/harmonious_dictionary/model_additions.rb
         | 
| 65 | 
            +
            - lib/harmonious_dictionary/railtie.rb
         | 
| 66 | 
            +
            - lib/harmonious_dictionary/rseg.rb
         | 
| 67 | 
            +
            - lib/harmonious_dictionary/version.rb
         | 
| 68 | 
            +
            - lib/tasks/generate_dictionary.rake
         | 
| 69 | 
            +
            - spec/harmonious_dictionary_spec.rb
         | 
| 70 | 
            +
            - spec/model_additions_spec.rb
         | 
| 71 | 
            +
            - spec/spec_helper.rb
         | 
| 72 | 
            +
            homepage: https://github.com/wear/harmonious_dictionary
         | 
| 73 | 
            +
            licenses: []
         | 
| 74 | 
            +
            post_install_message: 
         | 
| 75 | 
            +
            rdoc_options: []
         | 
| 76 | 
            +
            require_paths:
         | 
| 77 | 
            +
            - lib
         | 
| 78 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 79 | 
            +
              none: false
         | 
| 80 | 
            +
              requirements:
         | 
| 81 | 
            +
              - - ! '>='
         | 
| 82 | 
            +
                - !ruby/object:Gem::Version
         | 
| 83 | 
            +
                  version: '0'
         | 
| 84 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 85 | 
            +
              none: false
         | 
| 86 | 
            +
              requirements:
         | 
| 87 | 
            +
              - - ! '>='
         | 
| 88 | 
            +
                - !ruby/object:Gem::Version
         | 
| 89 | 
            +
                  version: '0'
         | 
| 90 | 
            +
            requirements: []
         | 
| 91 | 
            +
            rubyforge_project: 
         | 
| 92 | 
            +
            rubygems_version: 1.8.24
         | 
| 93 | 
            +
            signing_key: 
         | 
| 94 | 
            +
            specification_version: 3
         | 
| 95 | 
            +
            summary: filter any words that need to be harmonized
         | 
| 96 | 
            +
            test_files:
         | 
| 97 | 
            +
            - spec/harmonious_dictionary_spec.rb
         | 
| 98 | 
            +
            - spec/model_additions_spec.rb
         | 
| 99 | 
            +
            - spec/spec_helper.rb
         |