sensitive 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8465c512261a28b318ee88e693310764e855d6ed94178c3906adf04cc2fe6242
4
+ data.tar.gz: 7643a13ceeb746fcfb3154f861052035edb98eede8ac5b1b5ca85a83e250e52a
5
+ SHA512:
6
+ metadata.gz: 02f551cda48c4e882444af23d377d75ecfa8f7de1eb82db800bc9ba51347074c3a2489c7e7915041c32dd50e475e626a34c019f90fae340126769e0bb74f225e
7
+ data.tar.gz: 9d8cd16bd20a40b6f67a6aeeec0f6c0e22d8ce64d32a203fe4c1a9efbd2fad86a7e9c0a1d7d934629b2a898acfd1875322413ddb16c215553c6d9e77a1256a9e
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ .idea
@@ -0,0 +1,146 @@
1
+ # This is the configuration used to check the rubocop source code.
2
+
3
+ #inherit_from: .rubocop_todo.yml
4
+ require:
5
+ - rubocop/cop/internal_affairs
6
+ - rubocop-performance
7
+ - rubocop-rspec
8
+
9
+ AllCops:
10
+ Exclude:
11
+ - 'vendor/**/*'
12
+ - 'spec/fixtures/**/*'
13
+ - 'tmp/**/*'
14
+ - '.git/**/*'
15
+ TargetRubyVersion: 2.6
16
+
17
+ Naming/PredicateName:
18
+ # Method define macros for dynamically generated method.
19
+ MethodDefinitionMacros:
20
+ - define_method
21
+ - define_singleton_method
22
+ - def_node_matcher
23
+ - def_node_search
24
+
25
+ Style/FormatStringToken:
26
+ # Because we parse a lot of source codes from strings. Percent arrays
27
+ # look like unannotated format string tokens to this cop.
28
+ Exclude:
29
+ - spec/**/*
30
+
31
+ Style/IpAddresses:
32
+ # The test for this cop includes strings that would cause offenses
33
+ Exclude:
34
+ - spec/rubocop/cop/style/ip_addresses_spec.rb
35
+
36
+ Layout/EndOfLine:
37
+ EnforcedStyle: lf
38
+
39
+ Layout/ClassStructure:
40
+ Enabled: true
41
+ Categories:
42
+ module_inclusion:
43
+ - include
44
+ - prepend
45
+ - extend
46
+ ExpectedOrder:
47
+ - module_inclusion
48
+ - constants
49
+ - public_class_methods
50
+ - initializer
51
+ - instance_methods
52
+ - protected_methods
53
+ - private_methods
54
+
55
+ # Trailing white space is meaningful in code examples
56
+ Layout/TrailingWhitespace:
57
+ AllowInHeredoc: true
58
+
59
+ Lint/AmbiguousBlockAssociation:
60
+ Exclude:
61
+ - 'spec/**/*.rb'
62
+
63
+ Lint/InterpolationCheck:
64
+ Exclude:
65
+ - 'spec/**/*.rb'
66
+
67
+ Lint/UselessAccessModifier:
68
+ MethodCreatingMethods:
69
+ - 'def_matcher'
70
+ - 'def_node_matcher'
71
+
72
+ Lint/BooleanSymbol:
73
+ Enabled: false
74
+
75
+ Metrics/BlockLength:
76
+ Exclude:
77
+ - 'Rakefile'
78
+ - '**/*.rake'
79
+ - 'spec/**/*.rb'
80
+ - '**/*.gemspec'
81
+ - 'db/*.rb'
82
+ - 'Gemfile'
83
+
84
+ Metrics/ClassLength:
85
+ Exclude:
86
+ - lib/rubocop/config_obsoletion.rb
87
+
88
+ Metrics/ModuleLength:
89
+ Exclude:
90
+ - 'spec/**/*.rb'
91
+
92
+ Metrics/LineLength:
93
+ Max: 120
94
+ Exclude:
95
+ - 'db/*.rb'
96
+ - 'db/migrate/*.rb'
97
+ - 'config/*.rb'
98
+ - 'config/**/*.rb'
99
+ - 'spec/**/*.rb'
100
+ - 'bin/**'
101
+ - 'test/*.rb'
102
+
103
+ Metrics/MethodLength:
104
+ Max: 15
105
+ Exclude:
106
+ - 'db/*.rb'
107
+ - 'db/migrate/*.rb'
108
+ - 'bin/bundle'
109
+
110
+ RSpec/PredicateMatcher:
111
+ EnforcedStyle: explicit
112
+ Exclude:
113
+ - 'spec/models/*.rb'
114
+
115
+ RSpec/MessageSpies:
116
+ EnforcedStyle: receive
117
+
118
+ RSpec/NestedGroups:
119
+ Max: 7
120
+
121
+ # personal config
122
+ Style/AsciiComments:
123
+ Enabled: false
124
+
125
+ Style/Documentation:
126
+ Enabled: false
127
+
128
+ Style/ClassAndModuleChildren:
129
+ Exclude:
130
+ - 'test/**'
131
+ - 'app/controllers/users/*.rb'
132
+ - 'test/channels/application_cable/*.rb'
133
+
134
+ Metrics/CyclomaticComplexity:
135
+ Exclude:
136
+ - 'bin/bundle'
137
+ Metrics/AbcSize:
138
+ Max: 20
139
+ Exclude:
140
+ - 'db/migrate/*.rb'
141
+ - 'bin/bundle'
142
+
143
+ Metrics/PerceivedComplexity:
144
+ Exclude:
145
+ - 'bin/bundle'
146
+
@@ -0,0 +1,74 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to making participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, gender identity and expression, level of experience,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ ## Our Standards
13
+
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
16
+
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
22
+
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ## Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ## Scope
47
+
48
+ This Code of Conduct applies both within project spaces and in public spaces
49
+ when an individual is representing the project or its community. Examples of
50
+ representing a project or community include using an official project e-mail
51
+ address, posting via an official social media account, or acting as an appointed
52
+ representative at an online or offline event. Representation of a project may be
53
+ further defined and clarified by project maintainers.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at luolinae86@gmail.com. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71
+ available at [https://contributor-covenant.org/version/1/4][version]
72
+
73
+ [homepage]: https://contributor-covenant.org
74
+ [version]: https://contributor-covenant.org/version/1/4/
data/DFA.jpg ADDED
Binary file
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in sensitive.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2020 luolinae86
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 luolin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,147 @@
1
+ # 敏感词过滤
2
+
3
+ 基于`DFA`算法, 用 `Ruby` 实现敏感词过滤,具备以下功能
4
+ - 支持加载系统自带敏感词通用库(广告,政治,色情等等)
5
+ - 支持导入敏感词文件
6
+ - 支持添加单个敏感词
7
+ - 敏感词检查输出
8
+
9
+ ## 安装
10
+
11
+ `Gemfile` 文件增加下面一行:
12
+
13
+ ```ruby
14
+ gem 'sensitive'
15
+ ```
16
+
17
+ 然后执行:
18
+
19
+ $ bundle install
20
+
21
+ 或者手工安装:
22
+
23
+ $ gem install sensitive
24
+
25
+ ## 使用方法
26
+
27
+ #### 选择是否加载 `Gem` 自带敏感词库
28
+
29
+ `Gem` 里面提供了 **一万多条** 敏感词,涉及
30
+ - 广告
31
+ - 政治
32
+ - 色情
33
+ - 其它
34
+
35
+ 你可以根据自己选择是否加载,加载方法:
36
+
37
+ ```ruby
38
+ Sensitive.load_default
39
+ ```
40
+
41
+ #### 加载自己的敏感词文件
42
+
43
+ 你也可以加载自己的一个或多个敏感词文件,文件格式支持 `txt`, 不同的敏感词条独立一行
44
+
45
+ ```ruby
46
+ Sensitive.load_file(file_path)
47
+ ```
48
+
49
+ #### 动态添加单个敏感词
50
+
51
+ ```ruby
52
+ Sensitive.add_word('赌博')
53
+ ```
54
+
55
+ #### 清空敏感词
56
+
57
+ ```ruby
58
+ Sensitive.empty!
59
+ ```
60
+
61
+ #### 敏感词过滤
62
+
63
+ ```ruby
64
+ # filter 方法传入需要检测的敏感字符串,如果字符串中有敏感词,则返回,如果没有,则返回空
65
+ Sensitive.filter('不要赌博') #=> '赌博'
66
+ ```
67
+
68
+ ```ruby
69
+ irb(main):018:0> puts Sensitive.words
70
+ {}
71
+ irb(main):019:0> Sensitive.add_word('敏感词')
72
+ => ["敏", "感", "词"]
73
+ irb(main):020:0> puts Sensitive.words
74
+ {"敏"=>{:is_end=>false, :value=>{"感"=>{:is_end=>false, :value=>{"词"=>{:is_end=>true, :value=>{}}}}}}}
75
+ irb(main):021:0> puts Sensitive.filter('检测敏感词的算法')
76
+ 敏感词
77
+ irb(main):022:0> puts Sensitive.filter('色情信息')
78
+ irb(main):023:0> Sensitive.load_default
79
+ irb(main):024:0> puts Sensitive.filter('色情信息')
80
+ 色情信息
81
+ irb(main):025:0> Sensitive.empty
82
+ => {}
83
+ irb(main):026:0> puts Sensitive.words
84
+ {}
85
+ ```
86
+
87
+ ## DFA 算法
88
+ `Gem` 中,用 `ruby`实现了 `DFA` 算法
89
+ > DFA(Deterministic Finite Automaton,即确定有穷自动机。其原理为:有一个有限状态集合和一些从一个状态通向另一个状态的边,每条边上标记有一个符号,其中一个状态是初态,某些状态是终态。但不同于不确定的有限自动机,DFA中不会有从同一状态出发的两条边标志有相同的符号。
90
+
91
+ 因此,`DFA` 算法非常适合用来做敏感词过滤
92
+
93
+ #### DFA算法解析
94
+ 假如敏感词库里有:**赌博网站** 和 **赌博论坛** 这两个敏感词,首先我们要建立一个以下的结构
95
+
96
+ ![DFA](./DFA.jpg)
97
+
98
+ Ruby 算法会生成以下一棵如下内容的 `hash` 树
99
+ ```ruby
100
+ {
101
+ "赌"=>{
102
+ :is_end=>false,
103
+ :value=>{
104
+ "博"=>{
105
+ :is_end=>false,
106
+ :value=> {
107
+ "网"=>{
108
+ :is_end=>false,
109
+ :value=>{
110
+ "站"=>{
111
+ :is_end=>true,
112
+ :value=>{}
113
+ }
114
+ }
115
+ },
116
+ "论"=>{
117
+ :is_end=>false,
118
+ :value=>{
119
+ "坛"=>{
120
+ :is_end=>true,
121
+ :value=>{}
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+ }
128
+ }
129
+ }
130
+ ```
131
+
132
+ #### `DFA` 运行逻辑:
133
+
134
+ 比如检测敏感词 '赌博网站'
135
+
136
+ 首先切割成一个个字: 赌、博、网、站
137
+
138
+ 程序拿着 '赌' 为 `hash key` 在 `DFA hash` 树中查询,如果有就返回 '赌' 下面的子树,无则不是敏感词
139
+
140
+ 接上面子树, 匹配第二个字 '博',有就返回 '博' 下面的子树,无则不是敏感词。以此类推...
141
+
142
+ 最终 `is_end = true`,表明敏感词搜索结束
143
+
144
+ ## License
145
+
146
+ [MIT License](https://opensource.org/licenses/MIT).
147
+
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "sensitive"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)