harmonious_dictionary 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. data/.gitignore +5 -0
  2. data/.rspec +1 -0
  3. data/CHANGELOG +3 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.markdown +53 -0
  6. data/Rakefile +8 -0
  7. data/benchmark/benchmark.rb +43 -0
  8. data/benchmark/text_test_100.txt +4 -0
  9. data/benchmark/text_test_1000.txt +25 -0
  10. data/benchmark/text_test_10000.txt +219 -0
  11. data/bin/harmonious_rseg +11 -0
  12. data/bin/harmonious_server +63 -0
  13. data/harmonious_dictionary.gemspec +20 -0
  14. data/lib/generators/harmonious_dictionary/setup/setup_generator.rb +16 -0
  15. data/lib/generators/harmonious_dictionary/setup/templates/chinese_dictionary.txt +0 -0
  16. data/lib/generators/harmonious_dictionary/setup/templates/english_dictionary.txt +0 -0
  17. data/lib/generators/harmonious_dictionary/setup/templates/remote_server.yml +8 -0
  18. data/lib/harmonious_dictionary.rb +48 -0
  19. data/lib/harmonious_dictionary/app.rb +18 -0
  20. data/lib/harmonious_dictionary/engines/dict.rb +51 -0
  21. data/lib/harmonious_dictionary/engines/engine.rb +21 -0
  22. data/lib/harmonious_dictionary/engines/english.rb +27 -0
  23. data/lib/harmonious_dictionary/filters/conjunction.rb +11 -0
  24. data/lib/harmonious_dictionary/filters/fullwidth.rb +21 -0
  25. data/lib/harmonious_dictionary/filters/symbol.rb +16 -0
  26. data/lib/harmonious_dictionary/model_additions.rb +15 -0
  27. data/lib/harmonious_dictionary/railtie.rb +23 -0
  28. data/lib/harmonious_dictionary/rseg.rb +170 -0
  29. data/lib/harmonious_dictionary/version.rb +3 -0
  30. data/lib/tasks/generate_dictionary.rake +55 -0
  31. data/spec/harmonious_dictionary_spec.rb +40 -0
  32. data/spec/model_additions_spec.rb +57 -0
  33. data/spec/spec_helper.rb +21 -0
  34. metadata +99 -0
@@ -0,0 +1,3 @@
1
+ module HarmoniousDictionary
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,55 @@
1
+ require 'yaml'
2
+
3
+ namespace :harmonious_dictionary do
4
+ desc "generate harmonious dictionary for use"
5
+ task :generate => :environment do
6
+ chinese_dictionary_path = File.join(Rails.root, 'config','harmonious_dictionary','chinese_dictionary.txt')
7
+ english_dictionary_path = File.join(Rails.root, 'config','harmonious_dictionary','english_dictionary.txt')
8
+
9
+ puts "Processing chinese words..."
10
+ tree = {}
11
+ process(chinese_dictionary_path, tree)
12
+ File.open(hash_path, "wb") {|io| Marshal.dump(tree, io)}
13
+ puts 'Done'
14
+
15
+ puts 'Processing english words...'
16
+ english_dictionary = []
17
+ process_english_words(english_dictionary_path,english_dictionary)
18
+ File.open(yaml_path, "wb") {|io| YAML::dump(english_dictionary, io)}
19
+ puts 'Done'
20
+ end
21
+ end
22
+
23
+ def process_english_words(path,list)
24
+ File.open(path, 'r') do |file|
25
+ file.each_line{|line| list << line.gsub!("\n",'') }
26
+ end
27
+ end
28
+
29
+ def process(path, tree)
30
+ File.open(path, 'r') do |file|
31
+ file.each_line do |line|
32
+ node = nil
33
+ line.chars.each do |c|
34
+ next if c == "\n" || c == "\r"
35
+ if node
36
+ node[c] ||= {}
37
+ node = node[c]
38
+ else
39
+ tree[c] ||= Hash.new
40
+ node = tree[c]
41
+ end
42
+ end
43
+ node[:end] = true
44
+ end
45
+ end
46
+ end
47
+
48
+ def hash_path
49
+ File.join(Rails.root, 'config','harmonious_dictionary','harmonious.hash')
50
+ end
51
+
52
+ def yaml_path
53
+ File.join(Rails.root, 'config','harmonious_dictionary','harmonious_english.yml')
54
+ end
55
+
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe HarmoniousDictionary do
6
+
7
+ describe 'local' do
8
+ describe 'segment' do
9
+ it 'should return harmonious word for sentence' do
10
+ HarmoniousDictionary.harmonious_words('戴秉国在中国').should == ['戴秉国']
11
+ end
12
+
13
+ it 'should return english,url and chiese words' do
14
+ HarmoniousDictionary.harmonious_words('戴秉国 in china,watch cctv.com.let fuck it').should == ['戴秉国','fuck']
15
+ end
16
+ end
17
+
18
+ it 'should find harmonious chinese words' do
19
+ HarmoniousDictionary.clean?('李源潮在中国').should == false
20
+ end
21
+
22
+ it 'should pass good words' do
23
+ HarmoniousDictionary.clean?('过去').should == true
24
+ end
25
+
26
+ it 'should clean sentence by replace harmonious words by *' do
27
+ HarmoniousDictionary.clean('戴秉国在中国').should == '***在中国'
28
+ end
29
+
30
+ it 'should replace harmonious with *' do
31
+ HarmoniousDictionary.clean_word_basic('大米').should == '**'
32
+ end
33
+ end
34
+
35
+ # describe 'use remote' do
36
+ # it 'should use remote server for segment' do
37
+ # HarmoniousDictionary.clean_by_remote('戴秉国在中国').should == '***在中国'
38
+ # end
39
+ # end
40
+ end
@@ -0,0 +1,57 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
5
+
6
+ ActiveRecord::Schema.define(:version => 1) do
7
+ create_table :posts do |t|
8
+ t.string :title
9
+ t.text :body
10
+ t.text :note
11
+ end
12
+ end
13
+
14
+ class Post < ActiveRecord::Base
15
+ extend HarmoniousDictionary::ModelAdditions
16
+ validate_harmonious_of :title,:body
17
+ end
18
+
19
+ describe HarmoniousDictionary::ModelAdditions do
20
+ let(:post) { @post = Post.create title:'戴秉国在中国',body:'戴秉国在中国',note:'戴秉国在中国' }
21
+
22
+ describe 'use local' do
23
+ it 'should validate for harmonious' do
24
+ post.errors[:title].should == ['不能含有敏感词']
25
+ end
26
+
27
+ it 'should have error on title' do
28
+ post.errors[:body].should == ['不能含有敏感词']
29
+ end
30
+
31
+ it 'should allow empty input value' do
32
+ p = Post.create body:'戴秉国在中国',note:'戴秉国在中国'
33
+ post.errors[:body].should == ['不能含有敏感词']
34
+ end
35
+
36
+ it 'should filter! any harmonious words' do
37
+ HarmoniousDictionary.clean(post.body).should == '***在中国'
38
+ end
39
+ end
40
+
41
+ # 以后再开放
42
+ # describe 'use remote' do
43
+ # before(:each) do
44
+ # configuration = double("configuration")
45
+ # @double_harmonious_dictionary = double('harmonious_dictionary')
46
+ # configuration.stub(:harmonious_dictionary){ @double_harmonious_dictionary }
47
+ # @double_harmonious_dictionary.stub(:use_remote_server){ true }
48
+ # Rails.stub(:configuration){configuration}
49
+ # end
50
+
51
+ # it 'should validate for harmonious' do
52
+ # HarmoniousDictionary.should_receive(:clean_by_remote?)
53
+ # post.errors[:title].should == ['不能含有敏感词']
54
+ # end
55
+ # end
56
+ end
57
+
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+
3
+ require 'rails'
4
+ require 'active_model'
5
+ require 'active_record'
6
+ require 'harmonious_dictionary'
7
+
8
+ RSpec.configure do |config|
9
+ config.color_enabled = true
10
+ config.formatter = 'documentation'
11
+ end
12
+
13
+ module Rails
14
+ def self.root
15
+ File.join File.dirname(__FILE__),'../'
16
+ end
17
+
18
+ def self.env
19
+ 'test'
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: harmonious_dictionary
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Stephen Kong
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: 和谐宝典用于检查输入是否包含中文或英文敏感词,并可替换为特殊字符。速度比常规的正则匹配要快10倍以上。生活在天朝,和谐宝典必须人手必备。
31
+ email:
32
+ - wear63659220@gmail.com
33
+ executables:
34
+ - harmonious_rseg
35
+ - harmonious_server
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - .gitignore
40
+ - .rspec
41
+ - CHANGELOG
42
+ - MIT-LICENSE
43
+ - README.markdown
44
+ - Rakefile
45
+ - benchmark/benchmark.rb
46
+ - benchmark/text_test_100.txt
47
+ - benchmark/text_test_1000.txt
48
+ - benchmark/text_test_10000.txt
49
+ - bin/harmonious_rseg
50
+ - bin/harmonious_server
51
+ - harmonious_dictionary.gemspec
52
+ - lib/generators/harmonious_dictionary/setup/setup_generator.rb
53
+ - lib/generators/harmonious_dictionary/setup/templates/chinese_dictionary.txt
54
+ - lib/generators/harmonious_dictionary/setup/templates/english_dictionary.txt
55
+ - lib/generators/harmonious_dictionary/setup/templates/remote_server.yml
56
+ - lib/harmonious_dictionary.rb
57
+ - lib/harmonious_dictionary/app.rb
58
+ - lib/harmonious_dictionary/engines/dict.rb
59
+ - lib/harmonious_dictionary/engines/engine.rb
60
+ - lib/harmonious_dictionary/engines/english.rb
61
+ - lib/harmonious_dictionary/filters/conjunction.rb
62
+ - lib/harmonious_dictionary/filters/fullwidth.rb
63
+ - lib/harmonious_dictionary/filters/symbol.rb
64
+ - lib/harmonious_dictionary/model_additions.rb
65
+ - lib/harmonious_dictionary/railtie.rb
66
+ - lib/harmonious_dictionary/rseg.rb
67
+ - lib/harmonious_dictionary/version.rb
68
+ - lib/tasks/generate_dictionary.rake
69
+ - spec/harmonious_dictionary_spec.rb
70
+ - spec/model_additions_spec.rb
71
+ - spec/spec_helper.rb
72
+ homepage: https://github.com/wear/harmonious_dictionary
73
+ licenses: []
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 1.8.24
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: filter any words that need to be harmonized
96
+ test_files:
97
+ - spec/harmonious_dictionary_spec.rb
98
+ - spec/model_additions_spec.rb
99
+ - spec/spec_helper.rb