crm_formatter 1.0.7.pre.rc.1 → 2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -2
- data/.rspec_status +7 -0
- data/.rubocop.yml +10 -0
- data/.rubocop_todo.yml +188 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +202 -145
- data/Rakefile +75 -5
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/crm_formatter.gemspec +59 -13
- data/lib/crm_formatter.rb +46 -3
- data/lib/crm_formatter/address.rb +143 -122
- data/lib/crm_formatter/{extensions.csv → csv/extensions.csv} +0 -0
- data/lib/crm_formatter/csv/seed.csv +2 -0
- data/lib/crm_formatter/phone.rb +28 -20
- data/lib/crm_formatter/version.rb +4 -2
- data/lib/crm_formatter/web.rb +115 -248
- data/lib/crm_formatter/wrap.rb +54 -0
- data/menu.rb +3 -1
- data/non_utf8_examples.txt +40 -0
- data/result.txt +964 -0
- data/rubocop.json +1 -0
- metadata +211 -16
- data/bin/crm_formatter +0 -4
- data/gem_notes_crm_formatter.txt +0 -138
- data/lib/crm_formatter/helpers.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 96ed8a01bb47d8aac9c3bb7b95a6e8d261ecc107d6d47c8813088157978239c4
|
4
|
+
data.tar.gz: 88125eff101ca1ab5e5fcec3015d8cdb3f5f45571e644cfd54547ab8259f268c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 27ba92aa172d3de3813b6338ac77281f840ce7d158a29b54ffa329edf6eac4f2c6394f3608522ec78bb7fe8efaeb9fcd2f034b47e6676c2f275a61e3c39aa2b9
|
7
|
+
data.tar.gz: 7b497e726c925b6cf402c3efb20c0baae87adad9bfd5e18d4a024915bc2127533356aa331804282e9325e4b0055db55eb911aa5cb14ec85eb68870d2a78631e5
|
data/.gitignore
CHANGED
data/.rspec_status
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
example_id | status | run_time |
|
2
|
+
--------------------------------- | ------- | --------------- |
|
3
|
+
./spec/crm_formatter_spec.rb[1:1] | passed | 0.00103 seconds |
|
4
|
+
./spec/crm_formatter_spec.rb[1:2] | failed | 0.0207 seconds |
|
5
|
+
./spec/crm_formatter_spec.rb[1:3] | passed | 0.00086 seconds |
|
6
|
+
./spec/crm_formatter_spec.rb[1:4] | passed | 0.00009 seconds |
|
7
|
+
./spec/crm_formatter_spec.rb[1:5] | unknown | |
|
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,188 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2018-05-22 12:28:19 -0500 using RuboCop version 0.56.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Configuration parameters: Include.
|
11
|
+
# Include: **/*.gemspec
|
12
|
+
Gemspec/RequiredRubyVersion:
|
13
|
+
Exclude:
|
14
|
+
- 'crm_formatter.gemspec'
|
15
|
+
|
16
|
+
# Offense count: 2
|
17
|
+
# Cop supports --auto-correct.
|
18
|
+
Layout/CommentIndentation:
|
19
|
+
Exclude:
|
20
|
+
- 'Rakefile'
|
21
|
+
|
22
|
+
# Offense count: 3
|
23
|
+
# Cop supports --auto-correct.
|
24
|
+
# Configuration parameters: AllowAdjacentOneLineDefs, NumberOfEmptyLines.
|
25
|
+
Layout/EmptyLineBetweenDefs:
|
26
|
+
Exclude:
|
27
|
+
- 'lib/crm_formatter/wrap.rb'
|
28
|
+
|
29
|
+
# Offense count: 25
|
30
|
+
# Cop supports --auto-correct.
|
31
|
+
Layout/EmptyLines:
|
32
|
+
Exclude:
|
33
|
+
- 'Rakefile'
|
34
|
+
- 'lib/crm_formatter.rb'
|
35
|
+
- 'lib/crm_formatter/wrap.rb'
|
36
|
+
|
37
|
+
# Offense count: 1
|
38
|
+
# Cop supports --auto-correct.
|
39
|
+
# Configuration parameters: EnforcedStyle.
|
40
|
+
# SupportedStyles: empty_lines, no_empty_lines
|
41
|
+
Layout/EmptyLinesAroundBlockBody:
|
42
|
+
Exclude:
|
43
|
+
- 'spec/crm_formatter_spec.rb'
|
44
|
+
|
45
|
+
# Offense count: 2
|
46
|
+
# Cop supports --auto-correct.
|
47
|
+
# Configuration parameters: EnforcedStyle.
|
48
|
+
# SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines, beginning_only, ending_only
|
49
|
+
Layout/EmptyLinesAroundClassBody:
|
50
|
+
Exclude:
|
51
|
+
- 'lib/crm_formatter/wrap.rb'
|
52
|
+
|
53
|
+
# Offense count: 3
|
54
|
+
# Cop supports --auto-correct.
|
55
|
+
# Configuration parameters: EnforcedStyle.
|
56
|
+
# SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines
|
57
|
+
Layout/EmptyLinesAroundModuleBody:
|
58
|
+
Exclude:
|
59
|
+
- 'lib/crm_formatter.rb'
|
60
|
+
- 'lib/crm_formatter/wrap.rb'
|
61
|
+
|
62
|
+
# Offense count: 2
|
63
|
+
# Cop supports --auto-correct.
|
64
|
+
# Configuration parameters: .
|
65
|
+
# SupportedStyles: space, no_space
|
66
|
+
Layout/SpaceAroundEqualsInParameterDefault:
|
67
|
+
EnforcedStyle: no_space
|
68
|
+
|
69
|
+
# Offense count: 1
|
70
|
+
# Cop supports --auto-correct.
|
71
|
+
# Configuration parameters: AllowForAlignment.
|
72
|
+
Layout/SpaceAroundOperators:
|
73
|
+
Exclude:
|
74
|
+
- 'lib/crm_formatter/wrap.rb'
|
75
|
+
|
76
|
+
# Offense count: 1
|
77
|
+
# Cop supports --auto-correct.
|
78
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
79
|
+
# SupportedStyles: space, no_space
|
80
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
81
|
+
Layout/SpaceInsideBlockBraces:
|
82
|
+
Exclude:
|
83
|
+
- 'Gemfile'
|
84
|
+
|
85
|
+
# Offense count: 6
|
86
|
+
# Cop supports --auto-correct.
|
87
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
|
88
|
+
# SupportedStyles: space, no_space, compact
|
89
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
90
|
+
Layout/SpaceInsideHashLiteralBraces:
|
91
|
+
Exclude:
|
92
|
+
- 'lib/crm_formatter/wrap.rb'
|
93
|
+
|
94
|
+
# Offense count: 2
|
95
|
+
Lint/Debugger:
|
96
|
+
Exclude:
|
97
|
+
- 'lib/crm_formatter.rb'
|
98
|
+
|
99
|
+
# Offense count: 1
|
100
|
+
# Configuration parameters: IgnoreImplicitReferences.
|
101
|
+
Lint/ShadowedArgument:
|
102
|
+
Exclude:
|
103
|
+
- 'lib/crm_formatter/wrap.rb'
|
104
|
+
|
105
|
+
# Offense count: 1
|
106
|
+
# Cop supports --auto-correct.
|
107
|
+
# Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods.
|
108
|
+
Lint/UnusedMethodArgument:
|
109
|
+
Exclude:
|
110
|
+
- 'lib/crm_formatter/wrap.rb'
|
111
|
+
|
112
|
+
# Offense count: 4
|
113
|
+
Lint/UselessAssignment:
|
114
|
+
Exclude:
|
115
|
+
- 'lib/crm_formatter/wrap.rb'
|
116
|
+
|
117
|
+
# Offense count: 1
|
118
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
119
|
+
Metrics/BlockLength:
|
120
|
+
Max: 30
|
121
|
+
|
122
|
+
# Offense count: 1
|
123
|
+
# Configuration parameters: CountComments.
|
124
|
+
Metrics/MethodLength:
|
125
|
+
Max: 12
|
126
|
+
|
127
|
+
# Offense count: 1
|
128
|
+
Naming/AccessorMethodName:
|
129
|
+
Exclude:
|
130
|
+
- 'lib/crm_formatter/wrap.rb'
|
131
|
+
|
132
|
+
# Offense count: 3
|
133
|
+
Style/Documentation:
|
134
|
+
Exclude:
|
135
|
+
- 'spec/**/*'
|
136
|
+
- 'test/**/*'
|
137
|
+
- 'lib/crm_formatter.rb'
|
138
|
+
- 'lib/crm_formatter/dictionary.rb'
|
139
|
+
- 'lib/crm_formatter/wrap.rb'
|
140
|
+
|
141
|
+
# Offense count: 1
|
142
|
+
# Configuration parameters: MinBodyLength.
|
143
|
+
Style/GuardClause:
|
144
|
+
Exclude:
|
145
|
+
- 'lib/crm_formatter/wrap.rb'
|
146
|
+
|
147
|
+
# Offense count: 2
|
148
|
+
# Cop supports --auto-correct.
|
149
|
+
# Configuration parameters: EnforcedStyle, UseHashRocketsWithSymbolValues, PreferHashRocketsForNonAlnumEndingSymbols.
|
150
|
+
# SupportedStyles: ruby19, hash_rockets, no_mixed_keys, ruby19_no_mixed_keys
|
151
|
+
Style/HashSyntax:
|
152
|
+
Exclude:
|
153
|
+
- 'Rakefile'
|
154
|
+
|
155
|
+
# Offense count: 1
|
156
|
+
# Cop supports --auto-correct.
|
157
|
+
Style/MutableConstant:
|
158
|
+
Exclude:
|
159
|
+
- 'lib/crm_formatter/version.rb'
|
160
|
+
|
161
|
+
# Offense count: 2
|
162
|
+
# Cop supports --auto-correct.
|
163
|
+
# Configuration parameters: PreferredDelimiters.
|
164
|
+
Style/PercentLiteralDelimiters:
|
165
|
+
Exclude:
|
166
|
+
- 'lib/crm_formatter/wrap.rb'
|
167
|
+
|
168
|
+
# Offense count: 67
|
169
|
+
# Cop supports --auto-correct.
|
170
|
+
# Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
|
171
|
+
# SupportedStyles: single_quotes, double_quotes
|
172
|
+
Style/StringLiterals:
|
173
|
+
Exclude:
|
174
|
+
- 'Gemfile'
|
175
|
+
- 'Rakefile'
|
176
|
+
- 'bin/console'
|
177
|
+
- 'lib/crm_formatter.rb'
|
178
|
+
- 'lib/crm_formatter/version.rb'
|
179
|
+
- 'lib/crm_formatter/wrap.rb'
|
180
|
+
- 'menu.rb'
|
181
|
+
- 'spec/crm_formatter_spec.rb'
|
182
|
+
- 'spec/spec_helper.rb'
|
183
|
+
|
184
|
+
# Offense count: 7
|
185
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
186
|
+
# URISchemes: http, https
|
187
|
+
Metrics/LineLength:
|
188
|
+
Max: 549
|
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
In the interest of fostering an open and welcoming environment, we as
|
6
|
+
contributors and maintainers pledge to making participation in our project and
|
7
|
+
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
+
orientation.
|
11
|
+
|
12
|
+
## Our Standards
|
13
|
+
|
14
|
+
Examples of behavior that contributes to creating a positive environment
|
15
|
+
include:
|
16
|
+
|
17
|
+
* Using welcoming and inclusive language
|
18
|
+
* Being respectful of differing viewpoints and experiences
|
19
|
+
* Gracefully accepting constructive criticism
|
20
|
+
* Focusing on what is best for the community
|
21
|
+
* Showing empathy towards other community members
|
22
|
+
|
23
|
+
Examples of unacceptable behavior by participants include:
|
24
|
+
|
25
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
+
advances
|
27
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
+
* Public or private harassment
|
29
|
+
* Publishing others' private information, such as a physical or electronic
|
30
|
+
address, without explicit permission
|
31
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
+
professional setting
|
33
|
+
|
34
|
+
## Our Responsibilities
|
35
|
+
|
36
|
+
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
+
behavior and are expected to take appropriate and fair corrective action in
|
38
|
+
response to any instances of unacceptable behavior.
|
39
|
+
|
40
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
+
threatening, offensive, or harmful.
|
45
|
+
|
46
|
+
## Scope
|
47
|
+
|
48
|
+
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
+
when an individual is representing the project or its community. Examples of
|
50
|
+
representing a project or community include using an official project e-mail
|
51
|
+
address, posting via an official social media account, or acting as an appointed
|
52
|
+
representative at an online or offline event. Representation of a project may be
|
53
|
+
further defined and clarified by project maintainers.
|
54
|
+
|
55
|
+
## Enforcement
|
56
|
+
|
57
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
+
reported by contacting the project team at 4rlm@protonmail.ch. All
|
59
|
+
complaints will be reviewed and investigated and will result in a response that
|
60
|
+
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
+
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
+
Further details of specific enforcement policies may be posted separately.
|
63
|
+
|
64
|
+
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
+
faith may face temporary or permanent repercussions as determined by other
|
66
|
+
members of the project's leadership.
|
67
|
+
|
68
|
+
## Attribution
|
69
|
+
|
70
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
+
available at [http://contributor-covenant.org/version/1/4][version]
|
72
|
+
|
73
|
+
[homepage]: http://contributor-covenant.org
|
74
|
+
[version]: http://contributor-covenant.org/version/1/4/
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Adam Booth
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,172 +1,238 @@
|
|
1
1
|
|
2
|
-
#
|
2
|
+
# CRM Wrap
|
3
|
+
#### Efficiently Reformat, Normalize, and Scrub CRM Contact Data, such as Addresses, Phones and URLs.
|
3
4
|
|
4
|
-
|
5
|
+
CRM Wrap is perfect for curating high-volume enterprise-scale web scraping, and integrates well with Nokogiri, Mechanize, and asynchronous jobs via Delayed_job or SideKick, to name a few. Web Scraping and Harvesting often gathers a lot of junk to sift through; presenting unexpected edge cases around each corner. CRM Wrap has been developed and refined during the past few years to focus on improving that task.
|
5
6
|
|
6
|
-
|
7
|
-
However, it also plays an integral role in routine functions of apps, like formatting, normalizing, and even scrubbing existing databases, and submitted form data before saving to the database; via model callbacks, such as `before_validation` or `before_save`.
|
7
|
+
It's also perfect for processing API data, Web Forms, and routine DB normalizing and scrubbing processes. Not only does it reformat Address, Phone, and Web data, it can also accept lists to scrub against, then providing detailed reports about how each piece of data compares with your criteria lists.
|
8
8
|
|
9
|
-
|
10
|
-
However, **CRM Formatter** has been developed continuously for several years and is a reliable and integral part of a production CRM data verification app. The process of isolating the various modules into a consolidated open source gem has just recently begun, so documentation is still limited, but is frequently being added and refined.
|
9
|
+
The CRM Wrap Gem is currently in '--pre versioning', or 'beta mode' as the process of reorganizing these proprietary, production environment processes from their native app environment into this newly created open source gem. Formal tests in the gem environment are still on the way, as is the documentation. But the processes themselves have been very reliable and an integral part of a very large app dedicated to such services.
|
11
10
|
|
12
11
|
## Getting Started
|
13
|
-
|
12
|
+
CRM Wrap is compatible with Rails 4.2 and 5.0, 5.1 and 5.2 on Ruby 2.2 and later.
|
14
13
|
|
15
14
|
In your Gemfile add:
|
16
|
-
|
17
15
|
```
|
18
|
-
gem 'crm_formatter', '~> 1.0.
|
16
|
+
gem 'crm_formatter', '~> 1.0.8.pre.rc.1'
|
19
17
|
```
|
20
|
-
|
21
18
|
Or to install locally:
|
22
|
-
|
23
19
|
```
|
24
20
|
gem install crm_formatter --pre
|
25
21
|
```
|
26
|
-
|
27
22
|
## Usage
|
23
|
+
Using CRM Wrap in your app is very simple, and could be accessed from your app's concerns, , helpers, lib, models, or services, but depends on the scope, location, and size of your application and server. For simple form submission validations the model callback is typically ideal. For database normalizing tasks the concerns, helpers, or lib is typically ideal. For long running processes like web scraping or high volume APIs calls, like Google Linkedin, or Twitter the lib or services might be ideal (asynchronous multithreaded even better)
|
28
24
|
|
29
|
-
|
30
|
-
|
31
|
-
### I. Overview
|
32
|
-
|
33
|
-
#### 1. Access and Integration
|
34
|
-
##### Using **CRM Formatter** in your app is very simple, and could be accessed from your app's concerns, controllers, helpers, lib, models, or services, but depends on the scope, location, and size of your application and server.
|
35
|
-
* Simple form submission validations: model callback typically ideal.
|
36
|
-
* Database normalizing tasks: wrapper method in concerns, helpers, or lib typically ideal.
|
37
|
-
* Long running processes like web scraping or high volume APIs calls, like Google Linkedin, or Twitter: the lib or services might be ideal (multithreaded asynchronously even better)
|
38
|
-
|
39
|
-
#### 2. Hash Response
|
40
|
-
##### Formatted data will always be returned as a hash datatype the following key-value pairs:
|
41
|
-
* The originally submitted data as the first pair.
|
42
|
-
* Formatted data in the remaining pairs.
|
43
|
-
* A T/F boolean indicator pair regarding if the original and formatted data are different.
|
44
|
-
|
45
|
-
#### 3. Optional Arguments *OA*
|
46
|
-
##### A class can be instantiated with optional arguments *OA*.
|
47
|
-
* OA house the criteria by which you'd like to scrub your data.
|
48
|
-
* Each is either 'Pos' or 'Neg', for more accurate reporting of your scrubbing results.
|
49
|
-
* List of available Web OA is below, and each accepts data in the hash datatype, aka 'keyword-args'.
|
50
|
-
* For example, you might want to know which URLs contain 'twitter', 'facebook', or 'linkedin' either to focus on developing a list of business social media links, or perhaps you want to use such a list to better avoid such links.
|
51
|
-
* *OA is currently only available for the Web class.*
|
52
|
-
* *Address OA & Phone OA will be available in a future release.*
|
53
|
-
|
54
|
-
### II. Methods
|
55
|
-
##### CRM Formatter**'s top level module is `CRMFormatter` and contains the following three classes:
|
56
|
-
1. Address: `CRMFormatter::Address.new`
|
57
|
-
2. Phone: `CRMFormatter::Address.new`
|
58
|
-
3. Web: `CRMFormatter::Address.new`
|
59
|
-
|
60
|
-
###### Then assign the above to a variable name of your choosing.
|
61
|
-
`addr_formatter = CRMFormatter::Address.new`
|
62
|
-
`@addr_formatter = CRMFormatter::Address.new`
|
63
|
-
|
64
|
-
###### Web accepts optional arguments *OA* as a Hash (with Key-Value pairs)
|
65
|
-
Without OA: Instantiate normally if not using OA.
|
66
|
-
`web_formatter = CRMFormatter::Web.new`
|
67
|
-
|
68
|
-
With OA: Follow the steps to use Web OA:
|
69
|
-
1. Available Web OA and the required Key-Value naming and datatypes.
|
70
|
-
* Only list the OA K-V Pairs you're using. No need to list empty values. It's not all or nothing. These are empty to illustrate the expected datatypes.
|
71
|
-
|
72
|
-
Below is how the OA are received in the Web class at initialization.
|
73
|
-
**3. Web Examples at the very bottom has a very detailed example including how OA can be used.**
|
74
|
-
```
|
75
|
-
def initialize(args={})
|
76
|
-
@empty_oa = args.empty?
|
77
|
-
@pos_urls = args.fetch(:pos_urls, [])
|
78
|
-
@neg_urls = args.fetch(:neg_urls, [])
|
79
|
-
@pos_links = args.fetch(:pos_links, [])
|
80
|
-
@neg_links = args.fetch(:neg_links, [])
|
81
|
-
@pos_hrefs = args.fetch(:pos_hrefs, [])
|
82
|
-
@neg_hrefs = args.fetch(:neg_hrefs, [])
|
83
|
-
@pos_exts = args.fetch(:pos_exts, [])
|
84
|
-
@neg_exts = args.fetch(:neg_exts, [])
|
85
|
-
@min_length = args.fetch(:min_length, 2)
|
86
|
-
@max_length = args.fetch(:max_length, 100)
|
87
|
-
end
|
25
|
+
### Class Names
|
26
|
+
CrmFormatter contains three classes, which can be accessed like below with local or instance variables; you can name them anything you like.
|
88
27
|
```
|
28
|
+
adr_formatter = CrmFormatter::Address.new
|
29
|
+
@adr_formatter = CrmFormatter::Address.new
|
89
30
|
|
90
|
-
|
91
|
-
|
92
|
-
```
|
93
|
-
oa_args = { neg_urls: %w(approv insur invest loan quick rent repair),
|
94
|
-
neg_links: %w(buy call cash cheap click gas insta),
|
95
|
-
neg_hrefs: %w(after anounc apply approved blog buy call click),
|
96
|
-
neg_exts: %w(au ca edu es gov in ru uk us),
|
97
|
-
min_length: 0,
|
98
|
-
max_length: 30
|
99
|
-
}
|
31
|
+
ph_formatter = CrmFormatter::Phone.new
|
32
|
+
@ph_formatter = CrmFormatter::Phone.new
|
100
33
|
|
101
|
-
|
34
|
+
web_formatter = CrmFormatter::Web.new
|
35
|
+
@web_formatter = CrmFormatter::Web.new
|
102
36
|
```
|
103
37
|
|
104
|
-
|
105
|
-
|
106
|
-
`get_full_address()` takes a hash of address parts then runs each through their respective formatters, then also adds an additional feature of combining them into a long full address string, and indicates if there were any changes from the original version and newly formatted.
|
38
|
+
### Available Methods in Each Class
|
107
39
|
|
40
|
+
## Address Methods
|
41
|
+
These are the methods available to you. You can use them a la cart, for example if you just wanted to wrap all your states, or you could combine the entire address into `get_full_address()` which will run each of the related methods for you. It also adds an additional hash pair containing the full address as a single string. There is also an indicator pair to report if there were any changes from the original version to the newly formatted.
|
108
42
|
```
|
109
|
-
addr_formatter =
|
110
|
-
|
43
|
+
addr_formatter = CrmFormatter::Address.new
|
111
44
|
full_address_hash = {street: street, city: city, state: state, zip: zip}
|
112
|
-
|
113
45
|
addr_formatter.get_full_address(full_address_hash)
|
114
|
-
|
115
46
|
addr_formatter.format_street(street_string)
|
116
|
-
|
117
47
|
addr_formatter.format_city(city_string)
|
118
|
-
|
119
48
|
addr_formatter.format_state(state_string)
|
120
|
-
|
121
49
|
addr_formatter.format_zip(zip)
|
122
|
-
|
123
50
|
addr_formatter.format_full_address(adr = {})
|
124
|
-
|
125
51
|
addr_formatter.compare_versions(original, formatted)
|
126
|
-
|
127
52
|
```
|
128
53
|
|
129
54
|
#### Phone Methods
|
55
|
+
Phone only has two methods, with a subtle but important distinction between them. For simply formatting a known phone, use `format_phone` to convert to the normalized (555) 123-4567 wrap. Use `validate_phone` if either your phone data has a bunch of text and special characters to remove, or if you aren't even sure that it is a phone, as it will help determine if the phone number seem legitimate. If so, it then passes it along to `format_phone`.
|
56
|
+
```
|
57
|
+
ph_formatter = CrmFormatter::Phone.new
|
58
|
+
ph_formatter.validate_phone(phone)
|
59
|
+
ph_formatter.format_phone(phone)
|
60
|
+
```
|
61
|
+
|
62
|
+
#### Web Methods
|
63
|
+
The examples on this README are from `format_url` method. The others are for web scraping, which will be documented in the near future.
|
64
|
+
```
|
65
|
+
web_formatter = CrmFormatter::Web.new
|
66
|
+
web_formatter.format_url(url)
|
67
|
+
web_formatter.extract_path(url_path)
|
68
|
+
web_formatter.remove_invalid_links(link)
|
69
|
+
web_formatter.remove_invalid_hrefs(href)
|
70
|
+
web_formatter.convert_to_scheme_host(url)
|
71
|
+
```
|
130
72
|
|
131
|
-
|
73
|
+
## Examples
|
74
|
+
#### Below are two examples using the Web `format_url(url)` method:
|
132
75
|
|
76
|
+
### Example 1: 6 Example URLs Submitted:
|
77
|
+
Custom Method to Query URLs
|
78
|
+
```
|
79
|
+
def self.get_urls
|
80
|
+
urls = %w(website.com website.business.site website website.fake website.fake.com website.com.fake)
|
81
|
+
end
|
82
|
+
```
|
83
|
+
Custom Wrapper Method
|
84
|
+
```
|
85
|
+
def self.run_webs
|
86
|
+
web = CrmFormatter::Web.new
|
87
|
+
formatted_url_hashes = get_urls.map do |url|
|
88
|
+
url_hash = web.format_url(url)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
```
|
92
|
+
Results as Hash: 3/6 Reformatted due to invalid or no url extensions. 3 Reformatted and Normalized with `http://www.`
|
93
|
+
URL Extensions, **.com, .net, .fake** cross referenced with official IANA list.
|
94
|
+
```
|
95
|
+
[ {:reformatted=>true, :url_path=>"website.com", :formatted_url=>"http://www.website.com", :neg=>[], :pos=>[]},
|
96
|
+
{:reformatted=>false, :url_path=>"website.business.site", :formatted_url=>nil, :neg=>["error: ext.valid > 1 [business, site]"], :pos=>[]}, {:reformatted=>false, :url_path=>"website", :formatted_url=>nil, :neg=>["error: ext.none"], :pos=>[]},
|
97
|
+
{:reformatted=>false, :url_path=>"website.fake", :formatted_url=>nil, :neg=>["error: ext.invalid [fake]"], :pos=>[]},
|
98
|
+
{:reformatted=>true, :url_path=>"website.fake.com", :formatted_url=>"http://www.website.com", :neg=>[], :pos=>[]},
|
99
|
+
{:reformatted=>true, :url_path=>"website.com.fake", :formatted_url=>"http://www.website.com", :neg=>[], :pos=>[]}
|
100
|
+
]
|
133
101
|
```
|
134
|
-
ph_formatter = CRMFormatter::Phone.new
|
135
102
|
|
136
|
-
|
103
|
+
### Example 2: 6 Real URLs with Scrubbing Feature, but same configuration as above:
|
104
|
+
**Intentionally partially obfuscated**
|
105
|
+
```
|
106
|
+
urls = %w(approvXXXutosales.org autXXXartfinance.com leXXXummitautorepair.net melXXXtoyota.com norXXXastacura.com XXXmazda.com)
|
107
|
+
```
|
108
|
+
These results list 'neg' and 'pos', which are the criteria I was scrubbing against. I wanted to find the URLs of franchise auto dealers and exclude ancillary URLs.
|
109
|
+
```
|
110
|
+
[{:reformatted=>true, :url_path=>"approvXXXutosales.org", :formatted_url=>"http://www.approvXXXutosales.org", :neg=>["neg_urls: approv"], :pos=>[]},
|
111
|
+
{:reformatted=>true, :url_path=>"autXXXartfinance.com", :formatted_url=>"http://www.autXXXartfinance.com", :neg=>["neg_urls: financ"], :pos=>["pos_urls: smart"]},
|
112
|
+
{:reformatted=>true, :url_path=>"leXXXummitautorepair.net", :formatted_url=>"http://www.leXXXummitautorepair.net", :neg=>["neg_urls: repair"], :pos=>[]},
|
113
|
+
{:reformatted=>true, :url_path=>"melXXXtoyota.com", :formatted_url=>"http://www.melXXXtoyota.com", :neg=>[], :pos=>["pos_urls: toyota"]},
|
114
|
+
{:reformatted=>true, :url_path=>"norXXXastacura.com", :formatted_url=>"http://www.norXXXastacura.com", :neg=>[], :pos=>["pos_urls: acura"]},
|
115
|
+
{:reformatted=>true, :url_path=>"XXXmazda.com", :formatted_url=>"http://www.XXXmazda.com", :neg=>[], :pos=>["pos_urls: mazda"]}
|
116
|
+
]
|
117
|
+
```
|
137
118
|
|
138
|
-
|
119
|
+
## Quick Setup Guide
|
139
120
|
|
121
|
+
#### Create a Wrapper with a custom Class and Method(s)
|
122
|
+
This is just one of several ways to configure. If you only need the gem for formatting form data, you could just create a callback method in your model, but to scrub a database or process API and Harvested data, you'll want a dedicated process so you can manage the queue, criteria, and results. If you don't already have one, this example will show you how. Concerns, Helpers and Models might be fine for smaller tasks, but for heavier tasks Lib and Services are ideal, but depends on your specifications.
|
123
|
+
```
|
124
|
+
# /app/lib/start_crm.rb
|
140
125
|
```
|
126
|
+
```
|
127
|
+
class StartCrm
|
128
|
+
def initialize
|
129
|
+
@web = CrmFormatter::Web.new
|
130
|
+
end
|
141
131
|
|
142
|
-
|
132
|
+
def run_webs
|
133
|
+
formatted_url_hashes = urls.map do |url|
|
134
|
+
url_hash = @web.format_url(url)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
```
|
139
|
+
You may need to edit your application config file to recognize your new class.
|
140
|
+
```
|
141
|
+
#/app/config/application.rb
|
143
142
|
|
143
|
+
config.eager_load_paths << Rails.root.join('lib/**')
|
144
|
+
config.eager_load_paths += Dir["#{config.root}/lib/**/"]
|
145
|
+
```
|
146
|
+
#### Run in Rails Console
|
147
|
+
In this example, we'll run it in Rails Console like below, but you could also create a Rake Task and integrate it with a scheduled Cron Job. You could also run the process through your contoller actions in a GUI. If accessing through the front end, you might want to do it asynchronously with gems like Delayed_job or SideKick so you can free-up your controllers and prevent your front end from freezing while waiting for the job to complete; if running very large tasks.
|
148
|
+
```
|
149
|
+
2.5.1 :001 > StartCrm.new.run_webs
|
144
150
|
```
|
145
|
-
|
151
|
+
#### Instance vs Class Methods in your Wrapper
|
152
|
+
In the above example, `run_webs` is an instance method, but a class method `self.run_webs` could work well too, like the example below. At lease in the early stages, this is a little easier if you keep running it in Rails C, because not requiring initializing means less to type to call it. Next you could setup your class with various methods to assist your process, like so:
|
153
|
+
```
|
154
|
+
class StartCrm
|
155
|
+
def self.run_webs
|
156
|
+
web = CrmFormatter::Web.new
|
146
157
|
|
147
|
-
|
158
|
+
formatted_url_hashes = query_accounts.map do |act|
|
159
|
+
url_hsh = web.format_url(act.url)
|
148
160
|
|
149
|
-
|
161
|
+
if url_hash[:reformatted]
|
150
162
|
|
151
|
-
|
163
|
+
act_hsh = { url: url_hsh[:formatted_url],
|
164
|
+
url_sts: url_hsh[:formatted_url],
|
165
|
+
scrub_date: Time.now
|
166
|
+
}
|
167
|
+
else
|
168
|
+
act_hsh = { scrub_date: Time.now }
|
169
|
+
end
|
152
170
|
|
153
|
-
|
171
|
+
act.update(act_hsh)
|
172
|
+
end
|
173
|
+
end
|
154
174
|
|
155
|
-
|
175
|
+
def self.query_accounts
|
176
|
+
accounts = Account.where(url_sts: 'Invalid').limit(50)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
```
|
156
180
|
|
181
|
+
#### Data Response in a Hash
|
182
|
+
CRM Wrap returns data as a hash, which includes your original unaltered data you submitted, the formatted data, a T/F boolean indicator regarding if the original and formatted data are different, and for some methods, negs and pos regarding your criteria to scrub against. In the above example, the returned data from each submitted url would resemble the one below.
|
183
|
+
```
|
184
|
+
# format_url method returns data like below this example...
|
185
|
+
# url_hash = {:reformatted=>false,
|
186
|
+
:url_path=>"https://www.steXXXXXXmitsubishiserviceandpartscenter.com",
|
187
|
+
:formatted_url=>"https://www.steXXXXXXmitsubishiserviceandpartscenter.com",
|
188
|
+
:neg=>["neg_urls: parts, rv, service"],
|
189
|
+
:pos=>["pos_urls: mitsubishi"]
|
190
|
+
}
|
157
191
|
```
|
158
192
|
|
159
|
-
|
193
|
+
#### Optional Arguments OA
|
194
|
+
A class can be instantiated with optional arguments 'OA', to load your criteria to scrub against. Only list the OA K-V Pairs you're using. No need to list empty values. It's not all or nothing. These are empty to illustrate the expected datatypes.
|
195
|
+
**OA is currently only available for the Web class, but will soon be available in the Address & Phone classes.**
|
196
|
+
|
197
|
+
Below is how the OA are received in the Web class at initialization.
|
198
|
+
```
|
199
|
+
def initialize(args={})
|
200
|
+
@empty_oa = args.empty?
|
201
|
+
@pos_urls = args.fetch(:pos_urls, [])
|
202
|
+
@neg_urls = args.fetch(:neg_urls, [])
|
203
|
+
@pos_links = args.fetch(:pos_links, [])
|
204
|
+
@neg_links = args.fetch(:neg_links, [])
|
205
|
+
@pos_hrefs = args.fetch(:pos_hrefs, [])
|
206
|
+
@neg_hrefs = args.fetch(:neg_hrefs, [])
|
207
|
+
@pos_exts = args.fetch(:pos_exts, [])
|
208
|
+
@neg_exts = args.fetch(:neg_exts, [])
|
209
|
+
@min_length = args.fetch(:min_length, 2)
|
210
|
+
@max_length = args.fetch(:max_length, 100)
|
211
|
+
end
|
212
|
+
```
|
213
|
+
|
214
|
+
Below is the syntax for how to use OA. Positive and Negative options available, and essentially function the same, but allow additional options for scrubbing data.
|
215
|
+
```
|
216
|
+
oa_args = { neg_urls: %w(approv insur invest loan quick rent repair),
|
217
|
+
neg_links: %w(buy call cash cheap click gas insta),
|
218
|
+
neg_hrefs: %w(after anounc apply approved blog buy call click),
|
219
|
+
neg_exts: %w(au ca edu es gov in ru uk us),
|
220
|
+
min_length: 0,
|
221
|
+
max_length: 30
|
222
|
+
}
|
223
|
+
@web_formatter = CrmFormatter::Web.new(oa_args)
|
224
|
+
```
|
225
|
+
|
226
|
+
### III. Detailed Examples
|
160
227
|
Some of the examples are excessively verbose to help illustrate the datatypes and processes. Here are a few guidelines and tips:
|
161
|
-
**3. Web Examples at the very bottom is the most detailed and recent. It might be a good place to start.**
|
162
|
-
*These are just examples below, not strict usage guides ...*
|
163
228
|
|
164
|
-
|
229
|
+
*These are just examples, not strict usage guides ...*
|
165
230
|
|
231
|
+
#### 1. Address Examples
|
166
232
|
```
|
167
233
|
def self.run_adrs
|
168
234
|
|
169
|
-
crm_address_formatter =
|
235
|
+
crm_address_formatter = CrmFormatter::Address.new
|
170
236
|
|
171
237
|
contacts = Contact.where.not(full_address: nil)
|
172
238
|
|
@@ -184,11 +250,9 @@ end
|
|
184
250
|
```
|
185
251
|
|
186
252
|
#### 2. Phone Examples
|
187
|
-
|
188
|
-
In the phone example, format_all_phone_in_my_db could be a custom wrapper method, which when called by Rails C or from a front end GUI process, could grab all phones in db meeting certain criteria to be scrubbed. The results will always be in hash format, such as below.... phone_hash
|
189
|
-
|
253
|
+
In the phone example, format_all_phone_in_my_db could be a custom wrapper method, which when called by Rails C or from a front end GUI process, could grab all phones in db meeting certain criteria to be scrubbed. The results will always be in hash wrap, such as below.... phone_hash
|
190
254
|
```
|
191
|
-
@crm_phone =
|
255
|
+
@crm_phone = CrmFormatter::Phone.new
|
192
256
|
|
193
257
|
def self.format_all_phone_in_my_db
|
194
258
|
phones_from_contacts = Contacts.where.not(phone: nil)
|
@@ -199,15 +263,11 @@ def self.format_all_phone_in_my_db
|
|
199
263
|
|
200
264
|
end
|
201
265
|
|
202
|
-
phone_hash = { phone: 555-123-4567,
|
266
|
+
phone_hash = { phone: 555-123-4567, phone_f: (555) 123-4567, phone_status: true }
|
203
267
|
```
|
204
268
|
|
205
269
|
#### 3. Web Examples
|
206
|
-
|
207
|
-
The steps below will show you an option for how you could integrate larger processes in your app.
|
208
|
-
1. Create a wrapper method you can call from an action or Rails C. In this example, a new class was also created in Lib for that purpose, as there could be related methods to create.
|
209
|
-
* These examples only include `CRMFormatter::Web.new.format_url(url)` method. There are several additional methods available to you. Documentation is on the way, but in the mean time, try out the below example, then play around with the others too.
|
210
|
-
|
270
|
+
The steps below will show you an option for how you could integrate larger processes in your app. Create a wrapper method you can call from an action or Rails C. In this example, a new class was also created in Lib for that purpose, as there could be related methods to create.
|
211
271
|
```
|
212
272
|
# /app/lib/start_crm.rb
|
213
273
|
|
@@ -216,7 +276,7 @@ class StartCrm
|
|
216
276
|
##Rails C: StartCrm.run_webs
|
217
277
|
def self.run_webs
|
218
278
|
oa_args = get_args
|
219
|
-
web =
|
279
|
+
web = CrmFormatter::Web.new(oa_args)
|
220
280
|
|
221
281
|
formatted_url_hashes = get_urls.map do |url|
|
222
282
|
url_hash = web.format_url(url)
|
@@ -227,15 +287,14 @@ class StartCrm
|
|
227
287
|
|
228
288
|
end
|
229
289
|
```
|
230
|
-
|
231
|
-
|
290
|
+
Application Config
|
232
291
|
```
|
233
292
|
#/app/config/application.rb
|
234
293
|
|
235
294
|
config.eager_load_paths << Rails.root.join('lib/**')
|
236
295
|
config.eager_load_paths += Dir["#{config.root}/lib/**/"]
|
237
296
|
```
|
238
|
-
|
297
|
+
Create your db query or put together a list of URLs to process, along with any OA to include. The below example is very verbose, but designed to be helpful. In reality, you might have various criteria saved in the db rather than writing it out.
|
239
298
|
In this example, we have auto dealer URLs. In this process, we're focusing on franchise dealers.
|
240
299
|
```
|
241
300
|
def self.get_args
|
@@ -251,48 +310,46 @@ def self.get_urls
|
|
251
310
|
urls = ["https://www.stevXXXXXXmitsubishiserviceandpartscenter.com", "https://www.perXXXXXXchryslerjeepcenterville.com", "http://www.peXXXXXXchryslerjeepcenterville.com", "http://www.colXXXXXXchryslerdodgejeepram.com"]
|
252
311
|
end
|
253
312
|
```
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
* `:is_reformatted` indicates T/F if url_path and `:formatted_url` differ. If False, then it means they are the same, or the `:url_path` had significant errors which prevented it from being formatted, thus `:formatted_url` would be nil in such a case. The reality is that you might have some URLs that are so far off that, that they can't be reliably reformatted, so better to only let them pass if we are confident that they are reliable.
|
261
|
-
|
262
|
-
* `:url_path` is the url originally submitted by the client. It can include directory links on the end too, '/careers/, '/about-us/', etc.
|
263
|
-
|
264
|
-
* `:formatted_url` is the formatted version of `:url_path`. It will be stripped of additional paths, '/deals/', '/staff/', etc. Also, often times people ommit 'http://:' and 'www' in CRMs. This can sometimes cause errors for users or Mechanized Web Scrapers. So, those will always be included to ensure consistency. In our production app we follow up the formatting with url redirect following, which our configurations require the entire path, so it will always be included. The redirect following gem is already being worked on and will be released as an additional gem shortly.
|
265
|
-
|
266
|
-
* `:neg` is an array of all the errors and negative, undesirable criteria to scrub against. If you include the criteria in OA `neg_urls:`, like above, it will automatically scrub and report. Regardless, any errors will also be included in there. So, if the url was not ultimately formatted, there will be details regarding why in `:neg`.
|
267
|
-
|
268
|
-
* `:pos` is the opposite, which highlights positive criteria you might be looking for. It too is available in OA via `pos_urls:`, like above.
|
269
|
-
|
313
|
+
Run your class and wrapper method in Rails C. By creating the wrapper method, you have set up the entire process to run like a runner. In reality, you might have several different criteria accessible from a GUI or even running in Cron Jobs.
|
314
|
+
```
|
315
|
+
2.5.1 :001 > StartCrm.run_webs
|
316
|
+
```
|
317
|
+
Results are always in a Hash, like below. The URLs are slightly obfuscated out of respect (it's not a bug). These are examples from a large DB that runs on a loop 24/7 and gets to each organization about once a week, so it's already pretty well up to date, so there aren't any big changes below, but there are still a few things to point out below the code example.
|
270
318
|
```
|
271
|
-
[ {:
|
319
|
+
[ {:reformatted=>false,
|
272
320
|
:url_path=>"https://www.steXXXXXXmitsubishiserviceandpartscenter.com",
|
273
321
|
:formatted_url=>"https://www.steXXXXXXmitsubishiserviceandpartscenter.com",
|
274
322
|
:neg=>["neg_urls: parts, rv, service"],
|
275
323
|
:pos=>["pos_urls: mitsubishi"]},
|
276
324
|
|
277
|
-
{:
|
325
|
+
{:reformatted=>false,
|
278
326
|
:url_path=>"https://www.perXXXXXXchryslerjeepcenterville.com",
|
279
327
|
:formatted_url=>"https://www.perXXXXXXchryslerjeepcenterville.com",
|
280
328
|
:neg=>["neg_urls: rv"],
|
281
329
|
:pos=>["pos_urls: chrysler, jeep"]},
|
282
330
|
|
283
|
-
{:
|
331
|
+
{:reformatted=>false,
|
284
332
|
:url_path=>"http://www.pXXXXXXchryslerjeepcenterville.com",
|
285
333
|
:formatted_url=>"http://www.XXXXXXechryslerjeepcenterville.com",
|
286
334
|
:neg=>["neg_urls: rv"],
|
287
335
|
:pos=>["pos_urls: chrysler, jeep"]},
|
288
336
|
|
289
|
-
{:
|
337
|
+
{:reformatted=>false,
|
290
338
|
:url_path=>"http://www.colXXXXXXchryslerdodgejeepram.com",
|
291
339
|
:formatted_url=>"http://www.colXXXXXXchryslerdodgejeepram.com",
|
292
340
|
:neg=>["neg_urls: rv"],
|
293
341
|
:pos=>["pos_urls: chrysler, dodge, jeep, ram"]}
|
294
342
|
]
|
295
343
|
```
|
344
|
+
`:reformatted` indicates T/F if url_path and `:formatted_url` differ. If False, then it means they are the same, or the `:url_path` had significant errors which prevented it from being formatted, thus `:formatted_url` would be nil in such a case. The reality is that you might have some URLs that are so far off that, that they can't be reliably reformatted, so better to only let them pass if we are confident that they are reliable.
|
345
|
+
|
346
|
+
`:url_path` is the url originally submitted by the client. It can include directory links on the end too, '/careers/, '/about-us/', etc.
|
347
|
+
|
348
|
+
`:formatted_url` is the formatted version of `:url_path`. It will be stripped of additional paths, '/deals/', '/staff/', etc. Also, often times people ommit 'http://:' and 'www' in CRMs. This can sometimes cause errors for users or Mechanized Web Scrapers. So, those will always be included to ensure consistency. In our production app we follow up the formatting with url redirect following, which our configurations require the entire path, so it will always be included. The redirect following gem is already being worked on and will be released as an additional gem shortly.
|
349
|
+
|
350
|
+
`:neg` is an array of all the errors and negative, undesirable criteria to scrub against. If you include the criteria in OA `neg_urls:`, like above, it will automatically scrub and report. Regardless, any errors will also be included in there. So, if the url was not ultimately formatted, there will be details regarding why in `:neg`.
|
351
|
+
|
352
|
+
`:pos` is the opposite, which highlights positive criteria you might be looking for. It too is available in OA via `pos_urls:`, like above.
|
296
353
|
|
297
354
|
|
298
355
|
## Author
|