dejunk 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 11a02210b2135091e658c20aea3611ccb4b22627
4
- data.tar.gz: 6bd647976a92dff85ade9fca15f45848ed577118
2
+ SHA256:
3
+ metadata.gz: c544a625c931feb25e33682bb9a2c7ad243b57acaab1d1643d8ad410f7cbec69
4
+ data.tar.gz: dbae0776455be08aa508c2699bdc9c4041d23fc7d627a8fb99a985a05f5c16b7
5
5
  SHA512:
6
- metadata.gz: b6f54d2dd4a8ec45dd8209d34ee6bbc1eb6ba76819f25720cec74f3a0af4233eb71c4059dfe58b33ec668bec0a19a616eadf6a822648485a714b610254d338dc
7
- data.tar.gz: 5e92d10677561fa1b2b3afdf4e6ba582098266c9a51a92053e7cc80eb13107a54cec524c1921fbf4ad46937b0f44e37e00b88533d3d5992fec08d3c7b07141e7
6
+ metadata.gz: 48728d90252fa4b29bb38d165a0920325c1d4bd13a54f70be637fbd36b572df49458446bf4caaaefb5e7289ca1a5ec990b46d917734f10be8e2783edaae7f4ff
7
+ data.tar.gz: ca738d822697a164101f3e832e94a95cdf98e59180930bb0bd48c467e7999f0ad47da014b13e59ba36ce0d50326f17201eeb1b3a9357ab2469643ae0fd8fc5c4
data/.gitignore CHANGED
@@ -7,3 +7,5 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ TAGS
11
+ .DS_Store
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Dejunk
2
2
 
3
+ [![CircleCI](https://circleci.com/gh/academia-edu/dejunk.svg?style=svg)](https://circleci.com/gh/academia-edu/dejunk)
4
+
3
5
  Detect keyboard mashing and other junk in your data.
4
6
 
5
7
  For example, if you allow user-entered tags, but want to hide bad ones. Or if
@@ -51,7 +53,9 @@ $ Dejunk.is_junk?('Hi', whitelist_regexes: [/\Ahi\z/i])
51
53
 
52
54
  Returns a reason when junk is detected for aid in debugging. Optional parameters
53
55
  are `min_alnum_chars` (defaults to 3), and `whitelist_strings` and
54
- `whitelist_regexes` (both default to none).
56
+ `whitelist_regexes` (both default to none, but you'll likely want some domain-specific
57
+ strings here, which you might discover by checking against a sample from your existing
58
+ corpus).
55
59
 
56
60
  ## Development
57
61
 
@@ -0,0 +1,7 @@
1
+ machine:
2
+ ruby:
3
+ version: 2.2.3
4
+
5
+ dependencies:
6
+ pre:
7
+ - gem install bundler -v 1.10.6
@@ -131,11 +131,11 @@ module Dejunk
131
131
  return 0 unless bigrams.present?
132
132
 
133
133
  prob_bigrams_given_mashing = bigrams.
134
- map { |bigram| BigDecimal.new(mashing_probability(bigram).to_s) }.
134
+ map { |bigram| BigDecimal(mashing_probability(bigram).to_s) }.
135
135
  inject(&:*)
136
136
 
137
137
  prob_bigrams_given_corpus = bigrams.
138
- map { |bigram| BigDecimal.new(corpus_probability(bigram).to_s) }.
138
+ map { |bigram| BigDecimal(corpus_probability(bigram).to_s) }.
139
139
  inject(&:*)
140
140
 
141
141
  numerator = prob_bigrams_given_mashing * apriori_probability_of_mashing
@@ -1,3 +1,3 @@
1
1
  module Dejunk
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dejunk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Judd
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-12-31 00:00:00.000000000 Z
11
+ date: 2019-04-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -82,6 +82,7 @@ files:
82
82
  - Rakefile
83
83
  - bin/console
84
84
  - bin/setup
85
+ - circle.yml
85
86
  - dejunk.gemspec
86
87
  - lib/dejunk.rb
87
88
  - lib/dejunk/version.rb
@@ -104,8 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
105
  - !ruby/object:Gem::Version
105
106
  version: '0'
106
107
  requirements: []
107
- rubyforge_project:
108
- rubygems_version: 2.4.5.1
108
+ rubygems_version: 3.0.1
109
109
  signing_key:
110
110
  specification_version: 4
111
111
  summary: Detect keyboard mashing and other junk in your data.