dejunk 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 11a02210b2135091e658c20aea3611ccb4b22627
4
- data.tar.gz: 6bd647976a92dff85ade9fca15f45848ed577118
2
+ SHA256:
3
+ metadata.gz: c544a625c931feb25e33682bb9a2c7ad243b57acaab1d1643d8ad410f7cbec69
4
+ data.tar.gz: dbae0776455be08aa508c2699bdc9c4041d23fc7d627a8fb99a985a05f5c16b7
5
5
  SHA512:
6
- metadata.gz: b6f54d2dd4a8ec45dd8209d34ee6bbc1eb6ba76819f25720cec74f3a0af4233eb71c4059dfe58b33ec668bec0a19a616eadf6a822648485a714b610254d338dc
7
- data.tar.gz: 5e92d10677561fa1b2b3afdf4e6ba582098266c9a51a92053e7cc80eb13107a54cec524c1921fbf4ad46937b0f44e37e00b88533d3d5992fec08d3c7b07141e7
6
+ metadata.gz: 48728d90252fa4b29bb38d165a0920325c1d4bd13a54f70be637fbd36b572df49458446bf4caaaefb5e7289ca1a5ec990b46d917734f10be8e2783edaae7f4ff
7
+ data.tar.gz: ca738d822697a164101f3e832e94a95cdf98e59180930bb0bd48c467e7999f0ad47da014b13e59ba36ce0d50326f17201eeb1b3a9357ab2469643ae0fd8fc5c4
data/.gitignore CHANGED
@@ -7,3 +7,5 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ TAGS
11
+ .DS_Store
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Dejunk
2
2
 
3
+ [![CircleCI](https://circleci.com/gh/academia-edu/dejunk.svg?style=svg)](https://circleci.com/gh/academia-edu/dejunk)
4
+
3
5
  Detect keyboard mashing and other junk in your data.
4
6
 
5
7
  For example, if you allow user-entered tags, but want to hide bad ones. Or if
@@ -51,7 +53,9 @@ $ Dejunk.is_junk?('Hi', whitelist_regexes: [/\Ahi\z/i])
51
53
 
52
54
  Returns a reason when junk is detected for aid in debugging. Optional parameters
53
55
  are `min_alnum_chars` (defaults to 3), and `whitelist_strings` and
54
- `whitelist_regexes` (both default to none).
56
+ `whitelist_regexes` (both default to none, but you'll likely want some domain-specific
57
+ strings here, which you might discover by checking against a sample from your existing
58
+ corpus).
55
59
 
56
60
  ## Development
57
61
 
@@ -0,0 +1,7 @@
1
+ machine:
2
+ ruby:
3
+ version: 2.2.3
4
+
5
+ dependencies:
6
+ pre:
7
+ - gem install bundler -v 1.10.6
@@ -131,11 +131,11 @@ module Dejunk
131
131
  return 0 unless bigrams.present?
132
132
 
133
133
  prob_bigrams_given_mashing = bigrams.
134
- map { |bigram| BigDecimal.new(mashing_probability(bigram).to_s) }.
134
+ map { |bigram| BigDecimal(mashing_probability(bigram).to_s) }.
135
135
  inject(&:*)
136
136
 
137
137
  prob_bigrams_given_corpus = bigrams.
138
- map { |bigram| BigDecimal.new(corpus_probability(bigram).to_s) }.
138
+ map { |bigram| BigDecimal(corpus_probability(bigram).to_s) }.
139
139
  inject(&:*)
140
140
 
141
141
  numerator = prob_bigrams_given_mashing * apriori_probability_of_mashing
@@ -1,3 +1,3 @@
1
1
  module Dejunk
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dejunk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Judd
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-12-31 00:00:00.000000000 Z
11
+ date: 2019-04-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -82,6 +82,7 @@ files:
82
82
  - Rakefile
83
83
  - bin/console
84
84
  - bin/setup
85
+ - circle.yml
85
86
  - dejunk.gemspec
86
87
  - lib/dejunk.rb
87
88
  - lib/dejunk/version.rb
@@ -104,8 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
105
  - !ruby/object:Gem::Version
105
106
  version: '0'
106
107
  requirements: []
107
- rubyforge_project:
108
- rubygems_version: 2.4.5.1
108
+ rubygems_version: 3.0.1
109
109
  signing_key:
110
110
  specification_version: 4
111
111
  summary: Detect keyboard mashing and other junk in your data.