dejunk 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +15 -0
- data/dejunk.gemspec +2 -3
- data/lib/dejunk/version.rb +1 -1
- data/lib/dejunk.rb +11 -4
- metadata +10 -28
- data/circle.yml +0 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 78bada0220a075c407f09e81eeb3602b669f9c64a6b0107bf26764c2721c1f0c
|
|
4
|
+
data.tar.gz: 189aa6798ca085cee985df90a28c7efa42c57c0574ef6652382992e0ef0ff41c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 719a2c5bcc2f9a9c201f8856fbd5a219fe7b1b497c1c585d3c49c6104d4b37099fef3aef7635dc2fd5b054c1ae25fbd91c98b9bf569e1293fb9ee2c68049a10e
|
|
7
|
+
data.tar.gz: 0a0a75d55d940cd8e757a458385ab603541150a1a94e42532c9628a26b46be1b198220d781fddcb9d4d3da59ced552d7a8b056c7934c7192437420288298110a
|
data/dejunk.gemspec
CHANGED
|
@@ -6,7 +6,7 @@ require 'dejunk/version'
|
|
|
6
6
|
Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "dejunk"
|
|
8
8
|
spec.version = Dejunk::VERSION
|
|
9
|
-
spec.required_ruby_version = '
|
|
9
|
+
spec.required_ruby_version = '>= 3.3'
|
|
10
10
|
spec.authors = ["David Judd"]
|
|
11
11
|
spec.email = ["david@academia.edu"]
|
|
12
12
|
|
|
@@ -20,7 +20,6 @@ Gem::Specification.new do |spec|
|
|
|
20
20
|
|
|
21
21
|
spec.add_dependency 'activesupport'
|
|
22
22
|
|
|
23
|
-
spec.add_development_dependency "
|
|
24
|
-
spec.add_development_dependency "rake", "~> 12.3.3"
|
|
23
|
+
spec.add_development_dependency "rake"
|
|
25
24
|
spec.add_development_dependency "rspec"
|
|
26
25
|
end
|
data/lib/dejunk/version.rb
CHANGED
data/lib/dejunk.rb
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
require "
|
|
2
|
-
require "yaml"
|
|
1
|
+
require "active_support/core_ext/object/blank"
|
|
3
2
|
require "active_support/core_ext/string"
|
|
3
|
+
require "bigdecimal"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
require "dejunk/version"
|
|
4
7
|
|
|
5
8
|
module Dejunk
|
|
6
9
|
extend self
|
|
@@ -113,7 +116,7 @@ module Dejunk
|
|
|
113
116
|
string.
|
|
114
117
|
chars.
|
|
115
118
|
zip(string.chars[1..-1]).
|
|
116
|
-
map { |c1,c2| "#{c1.
|
|
119
|
+
map { |c1,c2| "#{c1.downcase}#{c2.downcase}" if c1 && c2 }.
|
|
117
120
|
compact.
|
|
118
121
|
map { |bigram| bigram.gsub(/[0-9]/, '0'.freeze) }.
|
|
119
122
|
map { |bigram| bigram.gsub(/[[:space:]]/, ' '.freeze) }
|
|
@@ -144,8 +147,12 @@ module Dejunk
|
|
|
144
147
|
end
|
|
145
148
|
|
|
146
149
|
def normalize_for_comparison(string)
|
|
150
|
+
# This mirrors what mb_chars did, assuming that non-UTF-8 encoded strings
|
|
151
|
+
# are actually UTF-8 in disguise. It's unclear whether this is necessary,
|
|
152
|
+
# but we left it in to avoid having to figure this out.
|
|
153
|
+
string = string.dup.force_encoding(Encoding::UTF_8) if string.encoding != Encoding::UTF_8
|
|
154
|
+
|
|
147
155
|
string.
|
|
148
|
-
mb_chars.
|
|
149
156
|
unicode_normalize(:nfkd).
|
|
150
157
|
gsub(/\p{Mn}+/, ''.freeze).
|
|
151
158
|
gsub(/[^[:alnum:]]+/, ''.freeze).
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dejunk
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David Judd
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: activesupport
|
|
@@ -24,34 +23,20 @@ dependencies:
|
|
|
24
23
|
- - ">="
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
25
|
version: '0'
|
|
27
|
-
- !ruby/object:Gem::Dependency
|
|
28
|
-
name: bundler
|
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
|
30
|
-
requirements:
|
|
31
|
-
- - "~>"
|
|
32
|
-
- !ruby/object:Gem::Version
|
|
33
|
-
version: '2.0'
|
|
34
|
-
type: :development
|
|
35
|
-
prerelease: false
|
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
-
requirements:
|
|
38
|
-
- - "~>"
|
|
39
|
-
- !ruby/object:Gem::Version
|
|
40
|
-
version: '2.0'
|
|
41
26
|
- !ruby/object:Gem::Dependency
|
|
42
27
|
name: rake
|
|
43
28
|
requirement: !ruby/object:Gem::Requirement
|
|
44
29
|
requirements:
|
|
45
|
-
- - "
|
|
30
|
+
- - ">="
|
|
46
31
|
- !ruby/object:Gem::Version
|
|
47
|
-
version:
|
|
32
|
+
version: '0'
|
|
48
33
|
type: :development
|
|
49
34
|
prerelease: false
|
|
50
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
36
|
requirements:
|
|
52
|
-
- - "
|
|
37
|
+
- - ">="
|
|
53
38
|
- !ruby/object:Gem::Version
|
|
54
|
-
version:
|
|
39
|
+
version: '0'
|
|
55
40
|
- !ruby/object:Gem::Dependency
|
|
56
41
|
name: rspec
|
|
57
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -66,13 +51,13 @@ dependencies:
|
|
|
66
51
|
- - ">="
|
|
67
52
|
- !ruby/object:Gem::Version
|
|
68
53
|
version: '0'
|
|
69
|
-
description:
|
|
70
54
|
email:
|
|
71
55
|
- david@academia.edu
|
|
72
56
|
executables: []
|
|
73
57
|
extensions: []
|
|
74
58
|
extra_rdoc_files: []
|
|
75
59
|
files:
|
|
60
|
+
- ".circleci/config.yml"
|
|
76
61
|
- ".gitignore"
|
|
77
62
|
- ".rspec"
|
|
78
63
|
- ".travis.yml"
|
|
@@ -82,7 +67,6 @@ files:
|
|
|
82
67
|
- Rakefile
|
|
83
68
|
- bin/console
|
|
84
69
|
- bin/setup
|
|
85
|
-
- circle.yml
|
|
86
70
|
- dejunk.gemspec
|
|
87
71
|
- lib/dejunk.rb
|
|
88
72
|
- lib/dejunk/version.rb
|
|
@@ -90,23 +74,21 @@ files:
|
|
|
90
74
|
homepage: https://github.com/academia-edu/dejunk
|
|
91
75
|
licenses: []
|
|
92
76
|
metadata: {}
|
|
93
|
-
post_install_message:
|
|
94
77
|
rdoc_options: []
|
|
95
78
|
require_paths:
|
|
96
79
|
- lib
|
|
97
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
98
81
|
requirements:
|
|
99
|
-
- - "
|
|
82
|
+
- - ">="
|
|
100
83
|
- !ruby/object:Gem::Version
|
|
101
|
-
version: '
|
|
84
|
+
version: '3.3'
|
|
102
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
103
86
|
requirements:
|
|
104
87
|
- - ">="
|
|
105
88
|
- !ruby/object:Gem::Version
|
|
106
89
|
version: '0'
|
|
107
90
|
requirements: []
|
|
108
|
-
rubygems_version: 3.
|
|
109
|
-
signing_key:
|
|
91
|
+
rubygems_version: 3.6.9
|
|
110
92
|
specification_version: 4
|
|
111
93
|
summary: Detect keyboard mashing and other junk in your data.
|
|
112
94
|
test_files: []
|