filter_word 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ 6-4tianwang
2
+ 89-64cdjp
3
+ ADMIN
4
+ Administrator
5
+ asshole
6
+ BLOWJOB
7
+ chinaliberal
8
+ chinamz
9
+ chinesenewsnet
10
+ Clockgemstone
11
+ creaders
12
+ Crestbone
13
+ dajiyuan
14
+ dfdz
15
+ DICK
16
+ falun
17
+ falundafa
18
+ Feelmistone
19
+ freechina
20
+ freenet
21
+ fuck
22
+ gcd
23
+ Gruepin
24
+ Guichuideng
25
+ HACKING
26
+ hongzhi
27
+ hrichina
28
+ HUANET
29
+ hypermart.net
30
+ incest
31
+ jiangdongriji
32
+ jiaochuang
33
+ jiaochun
34
+ KEFU
35
+ KISSMYASS
36
+ lihongzhi
37
+ minghui
38
+ minghuinews
39
+ nacb
40
+ Neckromancer
41
+ NMIS
42
+ PAPER64
43
+ penis
44
+ qiangjian
45
+ renminbao
46
+ renmingbao
47
+ SHIT
48
+ SUCKPENIS
49
+ taip
50
+ tibetalk
51
+ triangle
52
+ triangleboy
53
+ Tringel
54
+ UltraSurf
55
+ ustibet
56
+ voachinese
57
+ wangce
58
+ WEBZEN
59
+ wstaiji
60
+ xinsheng
61
+ YUMING
62
+ zangdu
63
+ ZHENGJIAN
64
+ ZHENGJIANWANG
65
+ ZHENSHANREN
66
+ zhuanfalun
@@ -0,0 +1,67 @@
1
+ require 'yaml'
2
+
3
+ namespace :filter_word do
4
+ desc "generate harmonious dictionary for use"
5
+ task :generate => :environment do
6
+
7
+ puts "Processing chinese words..."
8
+ tree = {}
9
+ model = ENV['model']
10
+
11
+ chinese_dictionary = model.nil? ? 'chinese_dictionary.txt' : "#{model}_chinese_dictionary.txt"
12
+ english_dictionary = model.nil? ? 'english_dictionary.txt' : "#{model}_english_dictionary.txt"
13
+ chinese_dictionary_path = File.join(Rails.root, 'config','filter_word', chinese_dictionary)
14
+ english_dictionary_path = File.join(Rails.root, 'config','filter_word', english_dictionary)
15
+
16
+ process(chinese_dictionary_path, tree)
17
+ File.open(hash_path(model), "wb") {|io| Marshal.dump(tree, io)}
18
+ puts 'chinese_dictionary hash Done'
19
+
20
+ puts 'Processing english words...'
21
+ english_dictionary_list = []
22
+ process_english_words(english_dictionary_path, english_dictionary_list)
23
+ File.open(yaml_path(model), "wb") {|io| YAML::dump(english_dictionary_list, io)}
24
+ puts 'english_dictionary yaml Done'
25
+ end
26
+ end
27
+
28
+ def process_english_words(path,list)
29
+ File.open(path, 'r') do |file|
30
+ file.each_line{|line| list << line.gsub!("\n",'') }
31
+ end
32
+ end
33
+
34
+ def process(path, tree)
35
+ File.open(path, 'r') do |file|
36
+ file.each_line do |line|
37
+ node = nil
38
+ line.chars.each do |c|
39
+ next if c == "\n" || c == "\r"
40
+ if node
41
+ node[c] ||= {}
42
+ node = node[c]
43
+ else
44
+ tree[c] ||= Hash.new
45
+ node = tree[c]
46
+ end
47
+ end
48
+ node[:end] = true
49
+ end
50
+ end
51
+ end
52
+
53
+ def hash_path(model = nil)
54
+ if model
55
+ File.join(Rails.root, 'config','filter_word',"#{model}_harmonious.hash")
56
+ else
57
+ File.join(Rails.root, 'config','filter_word','harmonious.hash')
58
+ end
59
+ end
60
+
61
+ def yaml_path(model = nil)
62
+ if model
63
+ File.join(Rails.root, 'config','filter_word',"#{model}_harmonious_english.yml")
64
+ else
65
+ File.join(Rails.root, 'config','filter_word','harmonious_english.yml')
66
+ end
67
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe FilterWord do
6
+
7
+ describe 'local' do
8
+ describe 'segment' do
9
+ it 'should return harmonious word for sentence' do
10
+ FilterWord.harmonious_words('戴秉国在中国').should == ['戴秉国']
11
+ end
12
+
13
+ it 'should return harmonious word for sentence under have model param' do
14
+ FilterWord.harmonious_words('戴秉国在中国', 'user').should == ['戴秉国']
15
+ end
16
+
17
+ it 'should return english,url and chinese words' do
18
+ FilterWord.harmonious_words('戴秉国 in china,watch cctv.com.let fuck it','user').should == ['戴秉国','fuck']
19
+ end
20
+
21
+ it 'should return english words under have model param' do
22
+ FilterWord.harmonious_words('Gruepin','user').should == ['Gruepin']
23
+ end
24
+
25
+ end
26
+
27
+ it 'should find harmonious chinese words' do
28
+ FilterWord.clean?('李源潮在中国').should == false
29
+ end
30
+
31
+ it 'should find harmonious chinese words under have model param' do
32
+ FilterWord.clean?('李源潮在中国', 'user').should == false
33
+ end
34
+
35
+ it 'should pass good words' do
36
+ FilterWord.clean?('过去').should == true
37
+ end
38
+
39
+ it 'should pass good words under have model param' do
40
+ FilterWord.clean?('过去', 'user').should == true
41
+ end
42
+
43
+ it 'should clean sentence by replace harmonious words by *' do
44
+ FilterWord.clean('戴秉国在中国').should == '***在中国'
45
+ end
46
+
47
+ it 'should clean sentence by replace harmonious words by * under model param' do
48
+ FilterWord.clean('戴秉国在中国', 'user').should == '***在中国'
49
+ end
50
+
51
+ it 'should replace harmonious with *' do
52
+ FilterWord.clean_word_basic('大米').should == '**'
53
+ end
54
+ end
55
+
56
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
5
+
6
+ ActiveRecord::Schema.define(:version => 1) do
7
+ create_table :posts do |t|
8
+ t.string :title
9
+ t.text :body
10
+ t.text :note
11
+ end
12
+ end
13
+
14
+ class Post < ActiveRecord::Base
15
+ extend FilterWord::ModelAdditions
16
+ validate_harmonious_of [:title,:body], model: :post
17
+ end
18
+
19
+ describe FilterWord::ModelAdditions do
20
+ let(:post) { @post = Post.create title:'戴秉国在中国',body:'戴秉国在中国',note:'戴秉国在中国' }
21
+
22
+ describe 'use local' do
23
+ it 'should validate for harmonious' do
24
+ post.errors[:title].should == ['不能含有敏感词']
25
+ end
26
+
27
+ it 'should have error on title' do
28
+ post.errors[:body].should == ['不能含有敏感词']
29
+ end
30
+
31
+ it 'should allow empty input value' do
32
+ p = Post.create body:'戴秉国在中国',note:'戴秉国在中国'
33
+ post.errors[:body].should == ['不能含有敏感词']
34
+ end
35
+
36
+ it 'should filter! any harmonious words' do
37
+ FilterWord.clean(post.body).should == '***在中国'
38
+ end
39
+ end
40
+
41
+ end
42
+
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+
3
+ require 'rails'
4
+ require 'active_model'
5
+ require 'active_record'
6
+ require 'filter_word'
7
+
8
+ RSpec.configure do |config|
9
+ config.color_enabled = true
10
+ config.formatter = 'documentation'
11
+ end
12
+
13
+ module Rails
14
+ def self.root
15
+ File.join File.dirname(__FILE__),'../'
16
+ end
17
+
18
+ def self.env
19
+ 'test'
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: filter_word
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - xiaobo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: "和谐检测用于检查输入是否包含中文或英文敏感词."
42
+ email:
43
+ - peterwillcn@gmail.com
44
+ executables:
45
+ - harmonious_rseg
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".gitignore"
50
+ - ".rspec"
51
+ - ".travis.yml"
52
+ - CHANGELOG
53
+ - MIT-LICENSE
54
+ - README.markdown
55
+ - Rakefile
56
+ - benchmark/benchmark.rb
57
+ - benchmark/text_test_100.txt
58
+ - benchmark/text_test_1000.txt
59
+ - benchmark/text_test_10000.txt
60
+ - bin/harmonious_rseg
61
+ - filter_word.gemspec
62
+ - lib/filter_word.rb
63
+ - lib/filter_word/app.rb
64
+ - lib/filter_word/engines/dict.rb
65
+ - lib/filter_word/engines/engine.rb
66
+ - lib/filter_word/engines/english.rb
67
+ - lib/filter_word/filters/conjunction.rb
68
+ - lib/filter_word/filters/fullwidth.rb
69
+ - lib/filter_word/filters/symbol.rb
70
+ - lib/filter_word/model_additions.rb
71
+ - lib/filter_word/railtie.rb
72
+ - lib/filter_word/rseg.rb
73
+ - lib/filter_word/version.rb
74
+ - lib/generators/filter_word/setup/setup_generator.rb
75
+ - lib/generators/filter_word/setup/templates/chinese_dictionary.txt
76
+ - lib/generators/filter_word/setup/templates/english_dictionary.txt
77
+ - lib/tasks/generate_dictionary.rake
78
+ - spec/filter_word_spec.rb
79
+ - spec/model_additions_spec.rb
80
+ - spec/spec_helper.rb
81
+ homepage: https://github.com/tian-xiaobo/filter_word
82
+ licenses: []
83
+ metadata: {}
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 2.4.5.1
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: filter any words that need to be harmonized
104
+ test_files:
105
+ - spec/filter_word_spec.rb
106
+ - spec/model_additions_spec.rb
107
+ - spec/spec_helper.rb