scrub_db 2.22 → 2.23

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92d54aa6daab25078012cec4499481f2800e2f39942923ee903a6157e5218c97
4
- data.tar.gz: 57bb1f734437f6d60ae0fc1264667473b1917d55ced8a09ff623348998e3559c
3
+ metadata.gz: 71f435783cd9717b8a67d76e5866ed6a60ce25582e9df4e96ba8690346730b7e
4
+ data.tar.gz: 84d726d3d4d35ebfa89b71f943aa3c48063fe9fe5ac2bb72fcbf99d420cb8a17
5
5
  SHA512:
6
- metadata.gz: 12a4e4b2aa0ee2ed5bb5d6286333f53bc20723c7e12c38fdc5505a9d6dab155fd21b42364eb89f0b1b300332b2c58f722b77d85d1b363a21c5bac74db839058b
7
- data.tar.gz: d85c36b73d4affa6fe438ba271f117f558d540cfce4e7efa9d51aca2ee548c8e1618fbf2cf4ae45199722c68425122cef344a6aff961cf8cce20034d0085900f
6
+ metadata.gz: 5f032de540a7aa3521f2cb01d66d89339f354d4fedb4d221744b4a046a7f41942296aca6e21fdd16caa0c72c1deb1a7d1d55d610785b584edaac84f0c344ee60
7
+ data.tar.gz: e31b40b6a7f41be06368812e7796125d2f593f9feee5712fe854b4293117367724dd0ebdc5ecdc93a9c9c54cb7b5f7850a3755b84cb14a1764679601b2b50db0
data/Rakefile CHANGED
@@ -17,15 +17,39 @@ task :console do
17
17
  require "active_support/all"
18
18
  ARGV.clear
19
19
 
20
- scrubbed_webs = run_scrub_webs
20
+ # scrubbed_webs = run_scrub_webs
21
21
  # scrubbed_proper_strings = run_scrub_proper_strings
22
22
  # scrubbed_strings = run_scrub_strings
23
- # binding.pry
23
+ scrubbed_hash = run_scrub_string
24
+ scrubbed_hash = run_scrub_proper_string
25
+ binding.pry
24
26
 
25
27
  IRB.start
26
28
  end
27
29
 
28
30
 
31
+ def run_scrub_string
32
+ strings_criteria = {
33
+ pos_criteria: WebsCriteria.seed_pos_urls,
34
+ neg_criteria: WebsCriteria.seed_neg_urls
35
+ }
36
+ string = 'quick auto-approval gmc and bmw-world of AUSTIN tx, INC'
37
+ strings_obj = ScrubDb::Strings.new(strings_criteria)
38
+ scrubbed_hash = strings_obj.scrub_string(string)
39
+ end
40
+
41
+
42
+ def run_scrub_proper_string
43
+ strings_criteria = {
44
+ pos_criteria: WebsCriteria.seed_pos_urls,
45
+ neg_criteria: WebsCriteria.seed_neg_urls
46
+ }
47
+ string = 'quick auto-approval gmc and bmw-world of AUSTIN tx, INC'
48
+ strings_obj = ScrubDb::Strings.new(strings_criteria)
49
+ scrubbed_hash = strings_obj.scrub_proper_string(string)
50
+ end
51
+
52
+
29
53
  def run_scrub_strings
30
54
  strings_criteria = {
31
55
  pos_criteria: WebsCriteria.seed_pos_urls,
@@ -82,11 +106,10 @@ def run_scrub_proper_strings
82
106
  ]
83
107
 
84
108
  strings_obj = ScrubDb::Strings.new(strings_criteria)
85
- scrubbed_proper_strings = strings_obj.scrub_proper_strings(array_of_propers)
109
+ scrubbed_proper_strings = strings_obj.scrub_proper_strings(array_of_strings)
86
110
  end
87
111
 
88
112
 
89
-
90
113
  def run_scrub_webs
91
114
  urls = %w[
92
115
  austinchevrolet.not.real
@@ -1,5 +1,4 @@
1
1
 
2
-
3
2
  module ScrubDb
4
3
  class Strings
5
4
  attr_accessor :filter, :empty_criteria
@@ -7,30 +6,40 @@ module ScrubDb
7
6
  def initialize(criteria={})
8
7
  @empty_criteria = criteria&.empty?
9
8
  @filter = ScrubDb::Filter.new(criteria) unless @empty_criteria
9
+ @crmf = CrmFormatter
10
10
  end
11
11
 
12
12
  def scrub_proper_strings(props=[])
13
- prop_hashes = CrmFormatter.format_propers(props)
14
- prop_hashes = merge_criteria(prop_hashes)
15
- prop_hashes.map! { |prop_hsh| scrub_hash(prop_hsh) }
13
+ prop_hashes = props.map! { |str| scrub_proper_string(str) }
16
14
  end
17
15
 
18
- def scrub_strings(strings=[])
19
- str_hashes = strings_to_hashes(strings)
20
- str_hashes = merge_criteria(str_hashes)
21
- str_hashes.map! { |str_hsh| scrub_hash(str_hsh) }
16
+ def scrub_strings(strs=[])
17
+ str_hashes = strs.map! { |str| scrub_string(str) }
22
18
  end
23
19
 
24
- def strings_to_hashes(strings)
25
- str_hashes = strings.map { |str| { string: str } }
20
+
21
+ def scrub_proper_string(string)
22
+ hsh = @crmf.format_proper(string)
23
+ hsh = merge_criteria(hsh)
24
+ hsh = scrub_hash(hsh)
26
25
  end
27
26
 
28
- def merge_criteria(hashes)
29
- hashes.map do |hsh|
30
- hsh.merge({ pos_criteria: [], neg_criteria: [] })
31
- end
27
+ def scrub_string(string)
28
+ hsh = string_to_hash(string)
29
+ hsh = merge_criteria(hsh)
30
+ hsh = scrub_hash(hsh)
31
+ end
32
+
33
+
34
+ def string_to_hash(string)
35
+ hsh = { string: string, pos_criteria: [], neg_criteria: [] }
36
+ end
37
+
38
+ def merge_criteria(hsh)
39
+ hsh = hsh.merge({ pos_criteria: [], neg_criteria: [] })
32
40
  end
33
41
 
42
+
34
43
  def scrub_hash(hsh)
35
44
  str = hsh[:string]
36
45
  prop = hsh[:proper_f]
@@ -48,5 +57,4 @@ module ScrubDb
48
57
  end
49
58
 
50
59
  end
51
-
52
60
  end
@@ -1,3 +1,3 @@
1
1
  module ScrubDb
2
- VERSION = "2.22"
2
+ VERSION = "2.23"
3
3
  end
data/lib/scrub_db/webs.rb CHANGED
@@ -53,7 +53,7 @@ module ScrubDb
53
53
  def scrub_url_hash(url_hash)
54
54
  url = url_hash[:url_f]
55
55
  path = url_hash[:url_path]
56
- href = url_hash[:href]
56
+ path = url_hash[:path]
57
57
  url_exts = url_hash[:url_exts]
58
58
 
59
59
  url_hash = @filter.scrub_oa(url_hash, url_exts, 'neg_exts', 'equal')
data/lib/webs_criteria.rb CHANGED
@@ -37,11 +37,11 @@ class WebsCriteria
37
37
  %w(com net)
38
38
  end
39
39
 
40
- # def self.seed_neg_hrefs
40
+ # def self.seed_neg_paths
41
41
  # %w(? .com .jpg @ * afri after anounc apply approved blog book business buy call care career cash charit cheap check click collis commerc cont contrib deal distrib download employ event face feature feed financ find fleet form gas generat golf here holiday hospi hour info insta inventory join later light login mail mobile movie museu music news none now oil part pay phone policy priva pump quick quote rate regist review saving schedul service shop sign site speci ticket tire today transla travel truck tv twitter watch youth)
42
42
  # end
43
43
  #
44
- # def self.seed_pos_hrefs
44
+ # def self.seed_pos_paths
45
45
  # %w(team staff management)
46
46
  # end
47
47
 
data/scrub_db.gemspec CHANGED
@@ -37,11 +37,10 @@ Gem::Specification.new do |spec|
37
37
 
38
38
  spec.required_ruby_version = '~> 2.5.1'
39
39
  spec.add_dependency 'activesupport', '~> 5.2'
40
+ spec.add_dependency 'crm_formatter', '~> 2.64'
40
41
  spec.add_dependency 'utf8_sanitizer', '~> 2.16'
41
- spec.add_dependency 'crm_formatter', '~> 2.61'
42
42
 
43
- # spec.add_development_dependency 'activesupport', '~> 5.2'
44
- # spec.add_development_dependency 'utf8_sanitizer', '~> 2.15'
43
+
45
44
  # spec.add_dependency "activesupport-inflector", ['~> 0.1.0']
46
45
  spec.add_development_dependency 'bundler', '~> 1.16', '>= 1.16.2'
47
46
  spec.add_development_dependency 'pry', '~> 0.11.3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrub_db
3
3
  version: !ruby/object:Gem::Version
4
- version: '2.22'
4
+ version: '2.23'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Booth
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-01 00:00:00.000000000 Z
11
+ date: 2018-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -25,33 +25,33 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '5.2'
27
27
  - !ruby/object:Gem::Dependency
28
- name: utf8_sanitizer
28
+ name: crm_formatter
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.16'
33
+ version: '2.64'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.16'
40
+ version: '2.64'
41
41
  - !ruby/object:Gem::Dependency
42
- name: crm_formatter
42
+ name: utf8_sanitizer
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '2.61'
47
+ version: '2.16'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '2.61'
54
+ version: '2.16'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: bundler
57
57
  requirement: !ruby/object:Gem::Requirement