harmonious_dictionary 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. data/.gitignore +5 -0
  2. data/.rspec +1 -0
  3. data/CHANGELOG +3 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README.markdown +53 -0
  6. data/Rakefile +8 -0
  7. data/benchmark/benchmark.rb +43 -0
  8. data/benchmark/text_test_100.txt +4 -0
  9. data/benchmark/text_test_1000.txt +25 -0
  10. data/benchmark/text_test_10000.txt +219 -0
  11. data/bin/harmonious_rseg +11 -0
  12. data/bin/harmonious_server +63 -0
  13. data/harmonious_dictionary.gemspec +20 -0
  14. data/lib/generators/harmonious_dictionary/setup/setup_generator.rb +16 -0
  15. data/lib/generators/harmonious_dictionary/setup/templates/chinese_dictionary.txt +0 -0
  16. data/lib/generators/harmonious_dictionary/setup/templates/english_dictionary.txt +0 -0
  17. data/lib/generators/harmonious_dictionary/setup/templates/remote_server.yml +8 -0
  18. data/lib/harmonious_dictionary.rb +48 -0
  19. data/lib/harmonious_dictionary/app.rb +18 -0
  20. data/lib/harmonious_dictionary/engines/dict.rb +51 -0
  21. data/lib/harmonious_dictionary/engines/engine.rb +21 -0
  22. data/lib/harmonious_dictionary/engines/english.rb +27 -0
  23. data/lib/harmonious_dictionary/filters/conjunction.rb +11 -0
  24. data/lib/harmonious_dictionary/filters/fullwidth.rb +21 -0
  25. data/lib/harmonious_dictionary/filters/symbol.rb +16 -0
  26. data/lib/harmonious_dictionary/model_additions.rb +15 -0
  27. data/lib/harmonious_dictionary/railtie.rb +23 -0
  28. data/lib/harmonious_dictionary/rseg.rb +170 -0
  29. data/lib/harmonious_dictionary/version.rb +3 -0
  30. data/lib/tasks/generate_dictionary.rake +55 -0
  31. data/spec/harmonious_dictionary_spec.rb +40 -0
  32. data/spec/model_additions_spec.rb +57 -0
  33. data/spec/spec_helper.rb +21 -0
  34. metadata +99 -0
@@ -0,0 +1,3 @@
1
+ module HarmoniousDictionary
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,55 @@
1
+ require 'yaml'
2
+
3
+ namespace :harmonious_dictionary do
4
+ desc "generate harmonious dictionary for use"
5
+ task :generate => :environment do
6
+ chinese_dictionary_path = File.join(Rails.root, 'config','harmonious_dictionary','chinese_dictionary.txt')
7
+ english_dictionary_path = File.join(Rails.root, 'config','harmonious_dictionary','english_dictionary.txt')
8
+
9
+ puts "Processing chinese words..."
10
+ tree = {}
11
+ process(chinese_dictionary_path, tree)
12
+ File.open(hash_path, "wb") {|io| Marshal.dump(tree, io)}
13
+ puts 'Done'
14
+
15
+ puts 'Processing english words...'
16
+ english_dictionary = []
17
+ process_english_words(english_dictionary_path,english_dictionary)
18
+ File.open(yaml_path, "wb") {|io| YAML::dump(english_dictionary, io)}
19
+ puts 'Done'
20
+ end
21
+ end
22
+
23
+ def process_english_words(path,list)
24
+ File.open(path, 'r') do |file|
25
+ file.each_line{|line| list << line.gsub!("\n",'') }
26
+ end
27
+ end
28
+
29
+ def process(path, tree)
30
+ File.open(path, 'r') do |file|
31
+ file.each_line do |line|
32
+ node = nil
33
+ line.chars.each do |c|
34
+ next if c == "\n" || c == "\r"
35
+ if node
36
+ node[c] ||= {}
37
+ node = node[c]
38
+ else
39
+ tree[c] ||= Hash.new
40
+ node = tree[c]
41
+ end
42
+ end
43
+ node[:end] = true
44
+ end
45
+ end
46
+ end
47
+
48
+ def hash_path
49
+ File.join(Rails.root, 'config','harmonious_dictionary','harmonious.hash')
50
+ end
51
+
52
+ def yaml_path
53
+ File.join(Rails.root, 'config','harmonious_dictionary','harmonious_english.yml')
54
+ end
55
+
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe HarmoniousDictionary do
6
+
7
+ describe 'local' do
8
+ describe 'segment' do
9
+ it 'should return harmonious word for sentence' do
10
+ HarmoniousDictionary.harmonious_words('戴秉国在中国').should == ['戴秉国']
11
+ end
12
+
13
+ it 'should return english,url and chiese words' do
14
+ HarmoniousDictionary.harmonious_words('戴秉国 in china,watch cctv.com.let fuck it').should == ['戴秉国','fuck']
15
+ end
16
+ end
17
+
18
+ it 'should find harmonious chinese words' do
19
+ HarmoniousDictionary.clean?('李源潮在中国').should == false
20
+ end
21
+
22
+ it 'should pass good words' do
23
+ HarmoniousDictionary.clean?('过去').should == true
24
+ end
25
+
26
+ it 'should clean sentence by replace harmonious words by *' do
27
+ HarmoniousDictionary.clean('戴秉国在中国').should == '***在中国'
28
+ end
29
+
30
+ it 'should replace harmonious with *' do
31
+ HarmoniousDictionary.clean_word_basic('大米').should == '**'
32
+ end
33
+ end
34
+
35
+ # describe 'use remote' do
36
+ # it 'should use remote server for segment' do
37
+ # HarmoniousDictionary.clean_by_remote('戴秉国在中国').should == '***在中国'
38
+ # end
39
+ # end
40
+ end
@@ -0,0 +1,57 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
5
+
6
+ ActiveRecord::Schema.define(:version => 1) do
7
+ create_table :posts do |t|
8
+ t.string :title
9
+ t.text :body
10
+ t.text :note
11
+ end
12
+ end
13
+
14
+ class Post < ActiveRecord::Base
15
+ extend HarmoniousDictionary::ModelAdditions
16
+ validate_harmonious_of :title,:body
17
+ end
18
+
19
+ describe HarmoniousDictionary::ModelAdditions do
20
+ let(:post) { @post = Post.create title:'戴秉国在中国',body:'戴秉国在中国',note:'戴秉国在中国' }
21
+
22
+ describe 'use local' do
23
+ it 'should validate for harmonious' do
24
+ post.errors[:title].should == ['不能含有敏感词']
25
+ end
26
+
27
+ it 'should have error on title' do
28
+ post.errors[:body].should == ['不能含有敏感词']
29
+ end
30
+
31
+ it 'should allow empty input value' do
32
+ p = Post.create body:'戴秉国在中国',note:'戴秉国在中国'
33
+ post.errors[:body].should == ['不能含有敏感词']
34
+ end
35
+
36
+ it 'should filter! any harmonious words' do
37
+ HarmoniousDictionary.clean(post.body).should == '***在中国'
38
+ end
39
+ end
40
+
41
+ # 以后再开放
42
+ # describe 'use remote' do
43
+ # before(:each) do
44
+ # configuration = double("configuration")
45
+ # @double_harmonious_dictionary = double('harmonious_dictionary')
46
+ # configuration.stub(:harmonious_dictionary){ @double_harmonious_dictionary }
47
+ # @double_harmonious_dictionary.stub(:use_remote_server){ true }
48
+ # Rails.stub(:configuration){configuration}
49
+ # end
50
+
51
+ # it 'should validate for harmonious' do
52
+ # HarmoniousDictionary.should_receive(:clean_by_remote?)
53
+ # post.errors[:title].should == ['不能含有敏感词']
54
+ # end
55
+ # end
56
+ end
57
+
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+
3
+ require 'rails'
4
+ require 'active_model'
5
+ require 'active_record'
6
+ require 'harmonious_dictionary'
7
+
8
+ RSpec.configure do |config|
9
+ config.color_enabled = true
10
+ config.formatter = 'documentation'
11
+ end
12
+
13
+ module Rails
14
+ def self.root
15
+ File.join File.dirname(__FILE__),'../'
16
+ end
17
+
18
+ def self.env
19
+ 'test'
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: harmonious_dictionary
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Stephen Kong
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: 和谐宝典用于检查输入是否包含中文或英文敏感词,并可替换为特殊字符。速度比常规的正则匹配要快10倍以上。生活在天朝,和谐宝典必须人手必备。
31
+ email:
32
+ - wear63659220@gmail.com
33
+ executables:
34
+ - harmonious_rseg
35
+ - harmonious_server
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - .gitignore
40
+ - .rspec
41
+ - CHANGELOG
42
+ - MIT-LICENSE
43
+ - README.markdown
44
+ - Rakefile
45
+ - benchmark/benchmark.rb
46
+ - benchmark/text_test_100.txt
47
+ - benchmark/text_test_1000.txt
48
+ - benchmark/text_test_10000.txt
49
+ - bin/harmonious_rseg
50
+ - bin/harmonious_server
51
+ - harmonious_dictionary.gemspec
52
+ - lib/generators/harmonious_dictionary/setup/setup_generator.rb
53
+ - lib/generators/harmonious_dictionary/setup/templates/chinese_dictionary.txt
54
+ - lib/generators/harmonious_dictionary/setup/templates/english_dictionary.txt
55
+ - lib/generators/harmonious_dictionary/setup/templates/remote_server.yml
56
+ - lib/harmonious_dictionary.rb
57
+ - lib/harmonious_dictionary/app.rb
58
+ - lib/harmonious_dictionary/engines/dict.rb
59
+ - lib/harmonious_dictionary/engines/engine.rb
60
+ - lib/harmonious_dictionary/engines/english.rb
61
+ - lib/harmonious_dictionary/filters/conjunction.rb
62
+ - lib/harmonious_dictionary/filters/fullwidth.rb
63
+ - lib/harmonious_dictionary/filters/symbol.rb
64
+ - lib/harmonious_dictionary/model_additions.rb
65
+ - lib/harmonious_dictionary/railtie.rb
66
+ - lib/harmonious_dictionary/rseg.rb
67
+ - lib/harmonious_dictionary/version.rb
68
+ - lib/tasks/generate_dictionary.rake
69
+ - spec/harmonious_dictionary_spec.rb
70
+ - spec/model_additions_spec.rb
71
+ - spec/spec_helper.rb
72
+ homepage: https://github.com/wear/harmonious_dictionary
73
+ licenses: []
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 1.8.24
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: filter any words that need to be harmonized
96
+ test_files:
97
+ - spec/harmonious_dictionary_spec.rb
98
+ - spec/model_additions_spec.rb
99
+ - spec/spec_helper.rb