scrub_db 2.1 → 2.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +2 -2
- data/README.md +14 -5
- data/Rakefile +6 -6
- data/lib/scrub_db.rb +2 -5
- data/lib/scrub_db/filter.rb +1 -0
- data/lib/scrub_db/strings.rb +1 -1
- data/lib/scrub_db/version.rb +1 -1
- data/lib/scrub_db/webs.rb +1 -1
- data/scrub_db.gemspec +32 -12
- metadata +8 -96
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2038504bcd3f2462cda381de0f5a89beadc289936668706778275ce36b0555cf
|
4
|
+
data.tar.gz: 8c984148871d933f7793349594b5d183cfbae982edbee161acd445599693ad3b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a02e37de20a4ff5d1ee06c949c21837f2053b36e8a9e86b199caccef532ef5880dace5ea897730ba1cd2816f534217118410756dd3221762e1bc93442ce96ea
|
7
|
+
data.tar.gz: '08d7e3fb645dd2a6b0abff5baa47c0e87bb4fb293618d88daab3fa9fb18c991b56481a774243f28b21b85b5e735862fadb4c36299a2dedc1bfae20dc83327174'
|
data/.rspec_status
CHANGED
@@ -1,4 +1,4 @@
|
|
1
1
|
example_id | status | run_time |
|
2
2
|
---------------------------- | ------ | --------------- |
|
3
|
-
./spec/scrub_db_spec.rb[1:1] | passed | 0.
|
4
|
-
./spec/scrub_db_spec.rb[1:2] | failed | 0.
|
3
|
+
./spec/scrub_db_spec.rb[1:1] | passed | 0.00098 seconds |
|
4
|
+
./spec/scrub_db_spec.rb[1:2] | failed | 0.0153 seconds |
|
data/README.md
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
# ScrubDb
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/4rlm/scrub_db.svg?branch=master)](https://travis-ci.org/4rlm/scrub_db)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/scrub_db.svg)](https://badge.fury.io/rb/scrub_db)
|
5
|
+
[![MIT License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
|
6
|
+
|
2
7
|
#### Scrub your database, api data, web scraping data, and web form submissions based on your custom criteria. Allows for different criteria for different jobs. Returns detailed reporting to zero-in on your data with ease, efficiency, and greater insight. Allows for option to pre-format data before scrubbing to also normalize and standardize your data sets, ex uniform URL patterns
|
3
8
|
|
4
9
|
## Installation
|
@@ -35,13 +40,17 @@ strings_obj = ScrubDb::Strings.new(strings_criteria)
|
|
35
40
|
|
36
41
|
##### 2. For Web Criteria
|
37
42
|
```
|
38
|
-
|
39
|
-
|
40
|
-
|
43
|
+
web_criteria = {
|
44
|
+
neg_urls: %w[aprov avis budget collis eat],
|
45
|
+
pos_urls: %w[acura audi bmw bentley],
|
46
|
+
neg_paths: %w[buy bye call cash cheap click collis cont distrib],
|
47
|
+
pos_paths: %w[team staff management],
|
48
|
+
neg_exts: %w[au ca edu es gov in ru uk us],
|
49
|
+
pos_exts: %w[com net]
|
41
50
|
}
|
42
|
-
webs_obj = ScrubDb::Webs.new(webs_criteria)
|
43
|
-
```
|
44
51
|
|
52
|
+
scrub_web_obj = ScrubDb::Webs.new(web_criteria)
|
53
|
+
```
|
45
54
|
|
46
55
|
|
47
56
|
#### Step 2: Load Your Data to Scrub:
|
data/Rakefile
CHANGED
@@ -18,8 +18,8 @@ task :console do
|
|
18
18
|
ARGV.clear
|
19
19
|
|
20
20
|
scrubbed_webs = run_scrub_webs
|
21
|
-
# scrubbed_strings = run_scrub_strings
|
22
21
|
# scrubbed_proper_strings = run_scrub_proper_strings
|
22
|
+
# scrubbed_strings = run_scrub_strings
|
23
23
|
# binding.pry
|
24
24
|
|
25
25
|
IRB.start
|
@@ -33,8 +33,8 @@ def run_scrub_strings
|
|
33
33
|
}
|
34
34
|
|
35
35
|
array_of_strings = [
|
36
|
-
'quick auto
|
37
|
-
'
|
36
|
+
'quick auto-approval gmc and bmw-world of AUSTIN tx, INC',
|
37
|
+
'quick auto-approval, inc',
|
38
38
|
'DOWNTOWN CAR REPAIR, INC',
|
39
39
|
'TEXAS TRAVEL, CO',
|
40
40
|
'123 Car-world Kia OF CHICAGO IL',
|
@@ -62,9 +62,9 @@ def run_scrub_proper_strings
|
|
62
62
|
neg_criteria: WebsCriteria.seed_neg_urls
|
63
63
|
}
|
64
64
|
|
65
|
-
|
66
|
-
'quick auto
|
67
|
-
'
|
65
|
+
array_of_strings = [
|
66
|
+
'quick auto-approval gmc and bmw-world of AUSTIN tx, INC',
|
67
|
+
'quick auto-approval, inc',
|
68
68
|
'DOWNTOWN CAR REPAIR, INC',
|
69
69
|
'TEXAS TRAVEL, CO',
|
70
70
|
'123 Car-world Kia OF CHICAGO IL',
|
data/lib/scrub_db.rb
CHANGED
@@ -2,13 +2,10 @@ require "scrub_db/version"
|
|
2
2
|
require 'scrub_db/webs'
|
3
3
|
require 'scrub_db/strings'
|
4
4
|
require 'scrub_db/filter'
|
5
|
-
|
5
|
+
|
6
6
|
require 'crm_formatter'
|
7
|
+
# require 'pry'
|
7
8
|
|
8
9
|
module ScrubDb
|
9
10
|
|
10
|
-
def self.welcome
|
11
|
-
puts "Welcome to the gem!"
|
12
|
-
end
|
13
|
-
|
14
11
|
end
|
data/lib/scrub_db/filter.rb
CHANGED
data/lib/scrub_db/strings.rb
CHANGED
data/lib/scrub_db/version.rb
CHANGED
data/lib/scrub_db/webs.rb
CHANGED
data/scrub_db.gemspec
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
|
2
2
|
lib = File.expand_path("../lib", __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'scrub_db'
|
5
4
|
require "scrub_db/version"
|
6
5
|
|
7
6
|
Gem::Specification.new do |spec|
|
@@ -35,22 +34,43 @@ Gem::Specification.new do |spec|
|
|
35
34
|
spec.bindir = "exe"
|
36
35
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
36
|
spec.require_paths = ["lib"]
|
38
|
-
spec.post_install_message = 'Thanks for installing scrub_db!'
|
39
37
|
|
40
38
|
spec.required_ruby_version = '~> 2.5.1'
|
41
|
-
spec.add_dependency 'activesupport', '~> 5.2'
|
42
|
-
|
43
|
-
|
44
|
-
spec.add_dependency "utf8_sanitizer", "~> 2.0"
|
45
|
-
spec.add_dependency "crm_formatter", "~> 2.6"
|
39
|
+
spec.add_dependency 'activesupport', '~> 5.2'
|
40
|
+
spec.add_dependency 'utf8_sanitizer', '~> 2.16'
|
41
|
+
spec.add_development_dependency 'crm_formatter', '~> 2.61'
|
46
42
|
|
43
|
+
# spec.add_development_dependency 'activesupport', '~> 5.2'
|
44
|
+
# spec.add_development_dependency 'utf8_sanitizer', '~> 2.15'
|
45
|
+
# spec.add_dependency "activesupport-inflector", ['~> 0.1.0']
|
47
46
|
spec.add_development_dependency 'bundler', '~> 1.16', '>= 1.16.2'
|
48
|
-
spec.add_development_dependency 'byebug', '~> 10.0', '>= 10.0.2'
|
49
|
-
spec.add_development_dependency 'class_indexer', '~> 0.3.0'
|
50
|
-
spec.add_development_dependency 'irbtools', '~> 2.2', '>= 2.2.1'
|
51
47
|
spec.add_development_dependency 'pry', '~> 0.11.3'
|
52
48
|
spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.1'
|
53
49
|
spec.add_development_dependency 'rspec', '~> 3.7'
|
54
|
-
spec.add_development_dependency '
|
55
|
-
spec.add_development_dependency '
|
50
|
+
# spec.add_development_dependency 'byebug', '~> 10.0', '>= 10.0.2'
|
51
|
+
# spec.add_development_dependency 'class_indexer', '~> 0.3.0'
|
52
|
+
# spec.add_development_dependency 'irbtools', '~> 2.2', '>= 2.2.1'
|
53
|
+
# spec.add_development_dependency 'rubocop', '~> 0.56.0'
|
54
|
+
# spec.add_development_dependency 'ruby-beautify', '~> 0.97.4'
|
55
|
+
# spec.add_runtime_dependency 'library', '~> 2.2'
|
56
|
+
# spec.add_dependency 'activerecord', '>= 3.0'
|
57
|
+
# spec.add_dependency 'actionpack', '>= 3.0'
|
58
|
+
# spec.add_dependency 'polyamorous', '~> 1.3.2'
|
59
|
+
# spec.add_development_dependency 'machinist', '~> 1.0.6'
|
60
|
+
# spec.add_development_dependency 'faker', '~> 0.9.5'
|
61
|
+
# spec.add_development_dependency 'sqlite3', '~> 1.3.3'
|
62
|
+
# spec.add_development_dependency 'pg', '~> 0.21'
|
63
|
+
# spec.add_development_dependency 'mysql2', '0.3.20'
|
64
|
+
|
65
|
+
# spec.requirements << 'libmagick, v6.0'
|
66
|
+
# spec.requirements << 'A good graphics card'
|
67
|
+
# # This gem will work with 1.8.6 or greater...
|
68
|
+
# spec.required_ruby_version = '>= 1.8.6'
|
69
|
+
#
|
70
|
+
# # Only with ruby 2.0.x
|
71
|
+
# spec.required_ruby_version = '~> 2.0'
|
72
|
+
#
|
73
|
+
# # Only with ruby between 2.2.0 and 2.2.2
|
74
|
+
# spec.required_ruby_version = ['>= 2.2.0', '< 2.2.3']
|
75
|
+
|
56
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrub_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '2.
|
4
|
+
version: '2.21'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Booth
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -17,9 +17,6 @@ dependencies:
|
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '5.2'
|
20
|
-
- - ">="
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 5.2.0
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -27,37 +24,34 @@ dependencies:
|
|
27
24
|
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '5.2'
|
30
|
-
- - ">="
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: 5.2.0
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: utf8_sanitizer
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
36
30
|
requirements:
|
37
31
|
- - "~>"
|
38
32
|
- !ruby/object:Gem::Version
|
39
|
-
version: '2.
|
33
|
+
version: '2.16'
|
40
34
|
type: :runtime
|
41
35
|
prerelease: false
|
42
36
|
version_requirements: !ruby/object:Gem::Requirement
|
43
37
|
requirements:
|
44
38
|
- - "~>"
|
45
39
|
- !ruby/object:Gem::Version
|
46
|
-
version: '2.
|
40
|
+
version: '2.16'
|
47
41
|
- !ruby/object:Gem::Dependency
|
48
42
|
name: crm_formatter
|
49
43
|
requirement: !ruby/object:Gem::Requirement
|
50
44
|
requirements:
|
51
45
|
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
|
-
version: '2.
|
54
|
-
type: :
|
47
|
+
version: '2.61'
|
48
|
+
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
51
|
requirements:
|
58
52
|
- - "~>"
|
59
53
|
- !ruby/object:Gem::Version
|
60
|
-
version: '2.
|
54
|
+
version: '2.61'
|
61
55
|
- !ruby/object:Gem::Dependency
|
62
56
|
name: bundler
|
63
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -78,60 +72,6 @@ dependencies:
|
|
78
72
|
- - ">="
|
79
73
|
- !ruby/object:Gem::Version
|
80
74
|
version: 1.16.2
|
81
|
-
- !ruby/object:Gem::Dependency
|
82
|
-
name: byebug
|
83
|
-
requirement: !ruby/object:Gem::Requirement
|
84
|
-
requirements:
|
85
|
-
- - "~>"
|
86
|
-
- !ruby/object:Gem::Version
|
87
|
-
version: '10.0'
|
88
|
-
- - ">="
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: 10.0.2
|
91
|
-
type: :development
|
92
|
-
prerelease: false
|
93
|
-
version_requirements: !ruby/object:Gem::Requirement
|
94
|
-
requirements:
|
95
|
-
- - "~>"
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
version: '10.0'
|
98
|
-
- - ">="
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
version: 10.0.2
|
101
|
-
- !ruby/object:Gem::Dependency
|
102
|
-
name: class_indexer
|
103
|
-
requirement: !ruby/object:Gem::Requirement
|
104
|
-
requirements:
|
105
|
-
- - "~>"
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
version: 0.3.0
|
108
|
-
type: :development
|
109
|
-
prerelease: false
|
110
|
-
version_requirements: !ruby/object:Gem::Requirement
|
111
|
-
requirements:
|
112
|
-
- - "~>"
|
113
|
-
- !ruby/object:Gem::Version
|
114
|
-
version: 0.3.0
|
115
|
-
- !ruby/object:Gem::Dependency
|
116
|
-
name: irbtools
|
117
|
-
requirement: !ruby/object:Gem::Requirement
|
118
|
-
requirements:
|
119
|
-
- - "~>"
|
120
|
-
- !ruby/object:Gem::Version
|
121
|
-
version: '2.2'
|
122
|
-
- - ">="
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: 2.2.1
|
125
|
-
type: :development
|
126
|
-
prerelease: false
|
127
|
-
version_requirements: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '2.2'
|
132
|
-
- - ">="
|
133
|
-
- !ruby/object:Gem::Version
|
134
|
-
version: 2.2.1
|
135
75
|
- !ruby/object:Gem::Dependency
|
136
76
|
name: pry
|
137
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -180,34 +120,6 @@ dependencies:
|
|
180
120
|
- - "~>"
|
181
121
|
- !ruby/object:Gem::Version
|
182
122
|
version: '3.7'
|
183
|
-
- !ruby/object:Gem::Dependency
|
184
|
-
name: rubocop
|
185
|
-
requirement: !ruby/object:Gem::Requirement
|
186
|
-
requirements:
|
187
|
-
- - "~>"
|
188
|
-
- !ruby/object:Gem::Version
|
189
|
-
version: 0.56.0
|
190
|
-
type: :development
|
191
|
-
prerelease: false
|
192
|
-
version_requirements: !ruby/object:Gem::Requirement
|
193
|
-
requirements:
|
194
|
-
- - "~>"
|
195
|
-
- !ruby/object:Gem::Version
|
196
|
-
version: 0.56.0
|
197
|
-
- !ruby/object:Gem::Dependency
|
198
|
-
name: ruby-beautify
|
199
|
-
requirement: !ruby/object:Gem::Requirement
|
200
|
-
requirements:
|
201
|
-
- - "~>"
|
202
|
-
- !ruby/object:Gem::Version
|
203
|
-
version: 0.97.4
|
204
|
-
type: :development
|
205
|
-
prerelease: false
|
206
|
-
version_requirements: !ruby/object:Gem::Requirement
|
207
|
-
requirements:
|
208
|
-
- - "~>"
|
209
|
-
- !ruby/object:Gem::Version
|
210
|
-
version: 0.97.4
|
211
123
|
description: Scrub your database, api data, web scraping data, and web form submissions
|
212
124
|
based on your custom criteria. Allows for different criteria for different jobs. Returns
|
213
125
|
detailed reporting to zero-in on your data with ease, efficiency, and greater insight. Allows
|
@@ -243,7 +155,7 @@ licenses:
|
|
243
155
|
- MIT
|
244
156
|
metadata:
|
245
157
|
allowed_push_host: https://rubygems.org
|
246
|
-
post_install_message:
|
158
|
+
post_install_message:
|
247
159
|
rdoc_options: []
|
248
160
|
require_paths:
|
249
161
|
- lib
|