scrub_db 2.1 → 2.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +2 -2
- data/README.md +14 -5
- data/Rakefile +6 -6
- data/lib/scrub_db.rb +2 -5
- data/lib/scrub_db/filter.rb +1 -0
- data/lib/scrub_db/strings.rb +1 -1
- data/lib/scrub_db/version.rb +1 -1
- data/lib/scrub_db/webs.rb +1 -1
- data/scrub_db.gemspec +32 -12
- metadata +8 -96
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2038504bcd3f2462cda381de0f5a89beadc289936668706778275ce36b0555cf
|
4
|
+
data.tar.gz: 8c984148871d933f7793349594b5d183cfbae982edbee161acd445599693ad3b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a02e37de20a4ff5d1ee06c949c21837f2053b36e8a9e86b199caccef532ef5880dace5ea897730ba1cd2816f534217118410756dd3221762e1bc93442ce96ea
|
7
|
+
data.tar.gz: '08d7e3fb645dd2a6b0abff5baa47c0e87bb4fb293618d88daab3fa9fb18c991b56481a774243f28b21b85b5e735862fadb4c36299a2dedc1bfae20dc83327174'
|
data/.rspec_status
CHANGED
@@ -1,4 +1,4 @@
|
|
1
1
|
example_id | status | run_time |
|
2
2
|
---------------------------- | ------ | --------------- |
|
3
|
-
./spec/scrub_db_spec.rb[1:1] | passed | 0.
|
4
|
-
./spec/scrub_db_spec.rb[1:2] | failed | 0.
|
3
|
+
./spec/scrub_db_spec.rb[1:1] | passed | 0.00098 seconds |
|
4
|
+
./spec/scrub_db_spec.rb[1:2] | failed | 0.0153 seconds |
|
data/README.md
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
# ScrubDb
|
2
|
+
|
3
|
+
[](https://travis-ci.org/4rlm/scrub_db)
|
4
|
+
[](https://badge.fury.io/rb/scrub_db)
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
6
|
+
|
2
7
|
#### Scrub your database, api data, web scraping data, and web form submissions based on your custom criteria. Allows for different criteria for different jobs. Returns detailed reporting to zero-in on your data with ease, efficiency, and greater insight. Allows for option to pre-format data before scrubbing to also normalize and standardize your data sets, ex uniform URL patterns
|
3
8
|
|
4
9
|
## Installation
|
@@ -35,13 +40,17 @@ strings_obj = ScrubDb::Strings.new(strings_criteria)
|
|
35
40
|
|
36
41
|
##### 2. For Web Criteria
|
37
42
|
```
|
38
|
-
|
39
|
-
|
40
|
-
|
43
|
+
web_criteria = {
|
44
|
+
neg_urls: %w[aprov avis budget collis eat],
|
45
|
+
pos_urls: %w[acura audi bmw bentley],
|
46
|
+
neg_paths: %w[buy bye call cash cheap click collis cont distrib],
|
47
|
+
pos_paths: %w[team staff management],
|
48
|
+
neg_exts: %w[au ca edu es gov in ru uk us],
|
49
|
+
pos_exts: %w[com net]
|
41
50
|
}
|
42
|
-
webs_obj = ScrubDb::Webs.new(webs_criteria)
|
43
|
-
```
|
44
51
|
|
52
|
+
scrub_web_obj = ScrubDb::Webs.new(web_criteria)
|
53
|
+
```
|
45
54
|
|
46
55
|
|
47
56
|
#### Step 2: Load Your Data to Scrub:
|
data/Rakefile
CHANGED
@@ -18,8 +18,8 @@ task :console do
|
|
18
18
|
ARGV.clear
|
19
19
|
|
20
20
|
scrubbed_webs = run_scrub_webs
|
21
|
-
# scrubbed_strings = run_scrub_strings
|
22
21
|
# scrubbed_proper_strings = run_scrub_proper_strings
|
22
|
+
# scrubbed_strings = run_scrub_strings
|
23
23
|
# binding.pry
|
24
24
|
|
25
25
|
IRB.start
|
@@ -33,8 +33,8 @@ def run_scrub_strings
|
|
33
33
|
}
|
34
34
|
|
35
35
|
array_of_strings = [
|
36
|
-
'quick auto
|
37
|
-
'
|
36
|
+
'quick auto-approval gmc and bmw-world of AUSTIN tx, INC',
|
37
|
+
'quick auto-approval, inc',
|
38
38
|
'DOWNTOWN CAR REPAIR, INC',
|
39
39
|
'TEXAS TRAVEL, CO',
|
40
40
|
'123 Car-world Kia OF CHICAGO IL',
|
@@ -62,9 +62,9 @@ def run_scrub_proper_strings
|
|
62
62
|
neg_criteria: WebsCriteria.seed_neg_urls
|
63
63
|
}
|
64
64
|
|
65
|
-
|
66
|
-
'quick auto
|
67
|
-
'
|
65
|
+
array_of_strings = [
|
66
|
+
'quick auto-approval gmc and bmw-world of AUSTIN tx, INC',
|
67
|
+
'quick auto-approval, inc',
|
68
68
|
'DOWNTOWN CAR REPAIR, INC',
|
69
69
|
'TEXAS TRAVEL, CO',
|
70
70
|
'123 Car-world Kia OF CHICAGO IL',
|
data/lib/scrub_db.rb
CHANGED
@@ -2,13 +2,10 @@ require "scrub_db/version"
|
|
2
2
|
require 'scrub_db/webs'
|
3
3
|
require 'scrub_db/strings'
|
4
4
|
require 'scrub_db/filter'
|
5
|
-
|
5
|
+
|
6
6
|
require 'crm_formatter'
|
7
|
+
# require 'pry'
|
7
8
|
|
8
9
|
module ScrubDb
|
9
10
|
|
10
|
-
def self.welcome
|
11
|
-
puts "Welcome to the gem!"
|
12
|
-
end
|
13
|
-
|
14
11
|
end
|
data/lib/scrub_db/filter.rb
CHANGED
data/lib/scrub_db/strings.rb
CHANGED
data/lib/scrub_db/version.rb
CHANGED
data/lib/scrub_db/webs.rb
CHANGED
data/scrub_db.gemspec
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
|
2
2
|
lib = File.expand_path("../lib", __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'scrub_db'
|
5
4
|
require "scrub_db/version"
|
6
5
|
|
7
6
|
Gem::Specification.new do |spec|
|
@@ -35,22 +34,43 @@ Gem::Specification.new do |spec|
|
|
35
34
|
spec.bindir = "exe"
|
36
35
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
36
|
spec.require_paths = ["lib"]
|
38
|
-
spec.post_install_message = 'Thanks for installing scrub_db!'
|
39
37
|
|
40
38
|
spec.required_ruby_version = '~> 2.5.1'
|
41
|
-
spec.add_dependency 'activesupport', '~> 5.2'
|
42
|
-
|
43
|
-
|
44
|
-
spec.add_dependency "utf8_sanitizer", "~> 2.0"
|
45
|
-
spec.add_dependency "crm_formatter", "~> 2.6"
|
39
|
+
spec.add_dependency 'activesupport', '~> 5.2'
|
40
|
+
spec.add_dependency 'utf8_sanitizer', '~> 2.16'
|
41
|
+
spec.add_development_dependency 'crm_formatter', '~> 2.61'
|
46
42
|
|
43
|
+
# spec.add_development_dependency 'activesupport', '~> 5.2'
|
44
|
+
# spec.add_development_dependency 'utf8_sanitizer', '~> 2.15'
|
45
|
+
# spec.add_dependency "activesupport-inflector", ['~> 0.1.0']
|
47
46
|
spec.add_development_dependency 'bundler', '~> 1.16', '>= 1.16.2'
|
48
|
-
spec.add_development_dependency 'byebug', '~> 10.0', '>= 10.0.2'
|
49
|
-
spec.add_development_dependency 'class_indexer', '~> 0.3.0'
|
50
|
-
spec.add_development_dependency 'irbtools', '~> 2.2', '>= 2.2.1'
|
51
47
|
spec.add_development_dependency 'pry', '~> 0.11.3'
|
52
48
|
spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.1'
|
53
49
|
spec.add_development_dependency 'rspec', '~> 3.7'
|
54
|
-
spec.add_development_dependency '
|
55
|
-
spec.add_development_dependency '
|
50
|
+
# spec.add_development_dependency 'byebug', '~> 10.0', '>= 10.0.2'
|
51
|
+
# spec.add_development_dependency 'class_indexer', '~> 0.3.0'
|
52
|
+
# spec.add_development_dependency 'irbtools', '~> 2.2', '>= 2.2.1'
|
53
|
+
# spec.add_development_dependency 'rubocop', '~> 0.56.0'
|
54
|
+
# spec.add_development_dependency 'ruby-beautify', '~> 0.97.4'
|
55
|
+
# spec.add_runtime_dependency 'library', '~> 2.2'
|
56
|
+
# spec.add_dependency 'activerecord', '>= 3.0'
|
57
|
+
# spec.add_dependency 'actionpack', '>= 3.0'
|
58
|
+
# spec.add_dependency 'polyamorous', '~> 1.3.2'
|
59
|
+
# spec.add_development_dependency 'machinist', '~> 1.0.6'
|
60
|
+
# spec.add_development_dependency 'faker', '~> 0.9.5'
|
61
|
+
# spec.add_development_dependency 'sqlite3', '~> 1.3.3'
|
62
|
+
# spec.add_development_dependency 'pg', '~> 0.21'
|
63
|
+
# spec.add_development_dependency 'mysql2', '0.3.20'
|
64
|
+
|
65
|
+
# spec.requirements << 'libmagick, v6.0'
|
66
|
+
# spec.requirements << 'A good graphics card'
|
67
|
+
# # This gem will work with 1.8.6 or greater...
|
68
|
+
# spec.required_ruby_version = '>= 1.8.6'
|
69
|
+
#
|
70
|
+
# # Only with ruby 2.0.x
|
71
|
+
# spec.required_ruby_version = '~> 2.0'
|
72
|
+
#
|
73
|
+
# # Only with ruby between 2.2.0 and 2.2.2
|
74
|
+
# spec.required_ruby_version = ['>= 2.2.0', '< 2.2.3']
|
75
|
+
|
56
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrub_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '2.
|
4
|
+
version: '2.21'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Booth
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -17,9 +17,6 @@ dependencies:
|
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '5.2'
|
20
|
-
- - ">="
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 5.2.0
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -27,37 +24,34 @@ dependencies:
|
|
27
24
|
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '5.2'
|
30
|
-
- - ">="
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: 5.2.0
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: utf8_sanitizer
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
36
30
|
requirements:
|
37
31
|
- - "~>"
|
38
32
|
- !ruby/object:Gem::Version
|
39
|
-
version: '2.
|
33
|
+
version: '2.16'
|
40
34
|
type: :runtime
|
41
35
|
prerelease: false
|
42
36
|
version_requirements: !ruby/object:Gem::Requirement
|
43
37
|
requirements:
|
44
38
|
- - "~>"
|
45
39
|
- !ruby/object:Gem::Version
|
46
|
-
version: '2.
|
40
|
+
version: '2.16'
|
47
41
|
- !ruby/object:Gem::Dependency
|
48
42
|
name: crm_formatter
|
49
43
|
requirement: !ruby/object:Gem::Requirement
|
50
44
|
requirements:
|
51
45
|
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
|
-
version: '2.
|
54
|
-
type: :
|
47
|
+
version: '2.61'
|
48
|
+
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
51
|
requirements:
|
58
52
|
- - "~>"
|
59
53
|
- !ruby/object:Gem::Version
|
60
|
-
version: '2.
|
54
|
+
version: '2.61'
|
61
55
|
- !ruby/object:Gem::Dependency
|
62
56
|
name: bundler
|
63
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -78,60 +72,6 @@ dependencies:
|
|
78
72
|
- - ">="
|
79
73
|
- !ruby/object:Gem::Version
|
80
74
|
version: 1.16.2
|
81
|
-
- !ruby/object:Gem::Dependency
|
82
|
-
name: byebug
|
83
|
-
requirement: !ruby/object:Gem::Requirement
|
84
|
-
requirements:
|
85
|
-
- - "~>"
|
86
|
-
- !ruby/object:Gem::Version
|
87
|
-
version: '10.0'
|
88
|
-
- - ">="
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: 10.0.2
|
91
|
-
type: :development
|
92
|
-
prerelease: false
|
93
|
-
version_requirements: !ruby/object:Gem::Requirement
|
94
|
-
requirements:
|
95
|
-
- - "~>"
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
version: '10.0'
|
98
|
-
- - ">="
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
version: 10.0.2
|
101
|
-
- !ruby/object:Gem::Dependency
|
102
|
-
name: class_indexer
|
103
|
-
requirement: !ruby/object:Gem::Requirement
|
104
|
-
requirements:
|
105
|
-
- - "~>"
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
version: 0.3.0
|
108
|
-
type: :development
|
109
|
-
prerelease: false
|
110
|
-
version_requirements: !ruby/object:Gem::Requirement
|
111
|
-
requirements:
|
112
|
-
- - "~>"
|
113
|
-
- !ruby/object:Gem::Version
|
114
|
-
version: 0.3.0
|
115
|
-
- !ruby/object:Gem::Dependency
|
116
|
-
name: irbtools
|
117
|
-
requirement: !ruby/object:Gem::Requirement
|
118
|
-
requirements:
|
119
|
-
- - "~>"
|
120
|
-
- !ruby/object:Gem::Version
|
121
|
-
version: '2.2'
|
122
|
-
- - ">="
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: 2.2.1
|
125
|
-
type: :development
|
126
|
-
prerelease: false
|
127
|
-
version_requirements: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '2.2'
|
132
|
-
- - ">="
|
133
|
-
- !ruby/object:Gem::Version
|
134
|
-
version: 2.2.1
|
135
75
|
- !ruby/object:Gem::Dependency
|
136
76
|
name: pry
|
137
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -180,34 +120,6 @@ dependencies:
|
|
180
120
|
- - "~>"
|
181
121
|
- !ruby/object:Gem::Version
|
182
122
|
version: '3.7'
|
183
|
-
- !ruby/object:Gem::Dependency
|
184
|
-
name: rubocop
|
185
|
-
requirement: !ruby/object:Gem::Requirement
|
186
|
-
requirements:
|
187
|
-
- - "~>"
|
188
|
-
- !ruby/object:Gem::Version
|
189
|
-
version: 0.56.0
|
190
|
-
type: :development
|
191
|
-
prerelease: false
|
192
|
-
version_requirements: !ruby/object:Gem::Requirement
|
193
|
-
requirements:
|
194
|
-
- - "~>"
|
195
|
-
- !ruby/object:Gem::Version
|
196
|
-
version: 0.56.0
|
197
|
-
- !ruby/object:Gem::Dependency
|
198
|
-
name: ruby-beautify
|
199
|
-
requirement: !ruby/object:Gem::Requirement
|
200
|
-
requirements:
|
201
|
-
- - "~>"
|
202
|
-
- !ruby/object:Gem::Version
|
203
|
-
version: 0.97.4
|
204
|
-
type: :development
|
205
|
-
prerelease: false
|
206
|
-
version_requirements: !ruby/object:Gem::Requirement
|
207
|
-
requirements:
|
208
|
-
- - "~>"
|
209
|
-
- !ruby/object:Gem::Version
|
210
|
-
version: 0.97.4
|
211
123
|
description: Scrub your database, api data, web scraping data, and web form submissions
|
212
124
|
based on your custom criteria. Allows for different criteria for different jobs. Returns
|
213
125
|
detailed reporting to zero-in on your data with ease, efficiency, and greater insight. Allows
|
@@ -243,7 +155,7 @@ licenses:
|
|
243
155
|
- MIT
|
244
156
|
metadata:
|
245
157
|
allowed_push_host: https://rubygems.org
|
246
|
-
post_install_message:
|
158
|
+
post_install_message:
|
247
159
|
rdoc_options: []
|
248
160
|
require_paths:
|
249
161
|
- lib
|