dejunk 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +2 -0
- data/README.md +5 -1
- data/circle.yml +7 -0
- data/lib/dejunk.rb +2 -2
- data/lib/dejunk/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c544a625c931feb25e33682bb9a2c7ad243b57acaab1d1643d8ad410f7cbec69
|
4
|
+
data.tar.gz: dbae0776455be08aa508c2699bdc9c4041d23fc7d627a8fb99a985a05f5c16b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48728d90252fa4b29bb38d165a0920325c1d4bd13a54f70be637fbd36b572df49458446bf4caaaefb5e7289ca1a5ec990b46d917734f10be8e2783edaae7f4ff
|
7
|
+
data.tar.gz: ca738d822697a164101f3e832e94a95cdf98e59180930bb0bd48c467e7999f0ad47da014b13e59ba36ce0d50326f17201eeb1b3a9357ab2469643ae0fd8fc5c4
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Dejunk
|
2
2
|
|
3
|
+
[![CircleCI](https://circleci.com/gh/academia-edu/dejunk.svg?style=svg)](https://circleci.com/gh/academia-edu/dejunk)
|
4
|
+
|
3
5
|
Detect keyboard mashing and other junk in your data.
|
4
6
|
|
5
7
|
For example, if you allow user-entered tags, but want to hide bad ones. Or if
|
@@ -51,7 +53,9 @@ $ Dejunk.is_junk?('Hi', whitelist_regexes: [/\Ahi\z/i])
|
|
51
53
|
|
52
54
|
Returns a reason when junk is detected for aid in debugging. Optional parameters
|
53
55
|
are `min_alnum_chars` (defaults to 3), and `whitelist_strings` and
|
54
|
-
`whitelist_regexes` (both default to none
|
56
|
+
`whitelist_regexes` (both default to none, but you'll likely want some domain-specific
|
57
|
+
strings here, which you might discover by checking against a sample from your existing
|
58
|
+
corpus).
|
55
59
|
|
56
60
|
## Development
|
57
61
|
|
data/circle.yml
ADDED
data/lib/dejunk.rb
CHANGED
@@ -131,11 +131,11 @@ module Dejunk
|
|
131
131
|
return 0 unless bigrams.present?
|
132
132
|
|
133
133
|
prob_bigrams_given_mashing = bigrams.
|
134
|
-
map { |bigram| BigDecimal
|
134
|
+
map { |bigram| BigDecimal(mashing_probability(bigram).to_s) }.
|
135
135
|
inject(&:*)
|
136
136
|
|
137
137
|
prob_bigrams_given_corpus = bigrams.
|
138
|
-
map { |bigram| BigDecimal
|
138
|
+
map { |bigram| BigDecimal(corpus_probability(bigram).to_s) }.
|
139
139
|
inject(&:*)
|
140
140
|
|
141
141
|
numerator = prob_bigrams_given_mashing * apriori_probability_of_mashing
|
data/lib/dejunk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dejunk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Judd
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -82,6 +82,7 @@ files:
|
|
82
82
|
- Rakefile
|
83
83
|
- bin/console
|
84
84
|
- bin/setup
|
85
|
+
- circle.yml
|
85
86
|
- dejunk.gemspec
|
86
87
|
- lib/dejunk.rb
|
87
88
|
- lib/dejunk/version.rb
|
@@ -104,8 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
105
|
- !ruby/object:Gem::Version
|
105
106
|
version: '0'
|
106
107
|
requirements: []
|
107
|
-
|
108
|
-
rubygems_version: 2.4.5.1
|
108
|
+
rubygems_version: 3.0.1
|
109
109
|
signing_key:
|
110
110
|
specification_version: 4
|
111
111
|
summary: Detect keyboard mashing and other junk in your data.
|