cf-swearjar 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ require 'yaml'
2
+ require 'fuzzy_hash'
3
+ require 'bloomfilter'
4
+
5
+ class Swearjar
6
+ class Tester
7
+
8
+ def initialize(config_file)
9
+ data = YAML.load_file
10
+
11
+ @tester = FuzzyHash.new
12
+
13
+ data['regex'].each do |pattern, type|
14
+ @tester[Regexp.new(pattern)] = type
15
+ end
16
+
17
+ data['simple'].each do |test, type|
18
+ @tester[test] = type
19
+ end
20
+
21
+ end
22
+
23
+ def scan(string, &block)
24
+ string.scan(/\b[\b]+\b/, &block)
25
+ end
26
+
27
+ def profane?(string)
28
+ scan(string) {|w| return true}
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ class Swearjar
2
+ VERSION = '1.0.1'
3
+ end
data/lib/swearjar.rb ADDED
@@ -0,0 +1,63 @@
1
+ require 'yaml'
2
+ require 'fuzzy_hash'
3
+
4
+ class Swearjar
5
+
6
+ def self.default
7
+ from_language
8
+ end
9
+
10
+ def self.from_language(language = 'en')
11
+ new(File.join(File.dirname(__FILE__), 'config', "#{language}.yml"))
12
+ end
13
+
14
+ attr_reader :tester, :hash
15
+
16
+ def initialize(file = nil)
17
+ @tester = FuzzyHash.new
18
+ @hash = {}
19
+ load_file(file) if file
20
+ end
21
+
22
+ def load_file(file)
23
+ data = YAML.load_file(file)
24
+
25
+ data['regex'].each do |pattern, type|
26
+ @tester[Regexp.new(pattern)] = type
27
+ end if data['regex']
28
+
29
+ data['simple'].each do |test, type|
30
+ @hash[test] = type
31
+ end if data['simple']
32
+ end
33
+
34
+ def scan(string, &block)
35
+ string = string.to_s
36
+ string.scan(/\b[a-zA-Z-]+\b/) do |word|
37
+ block.call(word, hash[word.downcase] || hash[word.downcase.gsub(/e?s$/,'')] )
38
+ end
39
+ if match = tester.match_with_result(string)
40
+ block.call(match.last, match.first)
41
+ end
42
+ end
43
+
44
+ def profane?(string)
45
+ string = string.to_s
46
+ scan(string) {|word, test| return true if !test.nil?}
47
+ return false
48
+ end
49
+
50
+ def scorecard(string)
51
+ string = string.to_s
52
+ scorecard = {}
53
+ scan(string) {|word, test| test.each { |type| scorecard.key?(type) ? scorecard[type] += 1 : scorecard[type] = 1} if test}
54
+ scorecard
55
+ end
56
+
57
+ def censor(string)
58
+ censored_string = string.to_s.dup
59
+ scan(string) {|word, test| censored_string.gsub!(word, block_given? ? yield(word) : word.gsub(/\S/, '*')) if test}
60
+ censored_string
61
+ end
62
+
63
+ end
@@ -0,0 +1,2 @@
1
+ simple:
2
+ "python": ["sexual"]
data/spec/spec.opts ADDED
@@ -0,0 +1,7 @@
1
+ --colour
2
+ --format
3
+ specdoc
4
+ --loadby
5
+ mtime
6
+ --reverse
7
+ --backtrace
@@ -0,0 +1 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'swearjar'))
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ describe Swearjar do
4
+
5
+ it "should detect dirty words" do
6
+ Swearjar.default.profane?('fuck you jim henson').should be_true
7
+ end
8
+
9
+ it "should detect dirty words regardless of case" do
10
+ Swearjar.default.profane?('FuCk you jim henson').should be_true
11
+ end
12
+
13
+ it "should not detect non-dirty words" do
14
+ Swearjar.default.profane?('i love you jim henson').should be_false
15
+ end
16
+
17
+ it "should give us a scorecard" do
18
+ Swearjar.default.scorecard('fuck you jim henson').should == {'sexual'=>1}
19
+ end
20
+
21
+ it "should detect multiword" do
22
+ Swearjar.default.scorecard('jim henson has a hard on').should == {'sexual'=>1}
23
+ end
24
+
25
+ it "should detect multiword plurals" do
26
+ Swearjar.default.scorecard('jim henson has a hard ons').should == {'sexual'=>1}
27
+ end
28
+
29
+ it "should detect simple dirty plurals" do
30
+ Swearjar.default.profane?('jim henson had two dicks').should be_true
31
+ Swearjar.default.profane?('jim henson has two asses').should be_true
32
+ end
33
+
34
+ it "should censor a string" do
35
+ Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody').should == 'jim henson has a massive **** ** he is gonna use to **** everybody'
36
+ end
37
+
38
+ it "should not do much when given a non-string" do
39
+ Swearjar.default.profane?(nil).should be_false
40
+ end
41
+
42
+ it "should allow you to load a new yaml file" do
43
+ sj = Swearjar.new
44
+ sj.load_file(File.expand_path('../data/swear.yml', __FILE__))
45
+ sj.censor("Python is the best language!").should == "****** is the best language!"
46
+ end
47
+
48
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cf-swearjar
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Joshua Hull
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fuzzyhash
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.0.11
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.0.11
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.8.7
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.8.7
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.3.0
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.3.0
62
+ description: Put another nickel in the swearjar. Simple profanity detection with content
63
+ analysis.
64
+ email: joshbuddy@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files:
68
+ - README.rdoc
69
+ files:
70
+ - .gitignore
71
+ - Gemfile
72
+ - README.rdoc
73
+ - Rakefile
74
+ - cf-swearjar.gemspec
75
+ - lib/config/en.yml
76
+ - lib/swearjar.rb
77
+ - lib/swearjar/tester.rb
78
+ - lib/swearjar/version.rb
79
+ - spec/data/swear.yml
80
+ - spec/spec.opts
81
+ - spec/spec_helper.rb
82
+ - spec/swearjar_spec.rb
83
+ homepage: http://github.com/joshbuddy/swearjar
84
+ licenses: []
85
+ post_install_message:
86
+ rdoc_options:
87
+ - --charset=UTF-8
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project: swearjar
104
+ rubygems_version: 1.8.24
105
+ signing_key:
106
+ specification_version: 3
107
+ summary: Put another nickel in the swearjar. Simple profanity detection with content
108
+ analysis
109
+ test_files:
110
+ - spec/data/swear.yml
111
+ - spec/spec.opts
112
+ - spec/spec_helper.rb
113
+ - spec/swearjar_spec.rb