base_scraper_service 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75c1ba4ef16ea7a756bae017e95051e3c572c55d169da8d4cb7331bdf12c974b
|
4
|
+
data.tar.gz: fe704e0af988b43de06b235bdee1761a9726cbb153a0b8cbdc22dab770ae1960
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca95405e99c42f18556b91c8b48db8c173c10f2305fb55465644c85c408b2492216978329c47f6cc4bd5273c36c2e8811b5362dc66bc3fbc46ba782079d0a14a
|
7
|
+
data.tar.gz: 51d1fed725886178366a038cae7a5386dd3bc389e490f4a537f6c91aaf39a441fe39d99932a114d22aaa3dbd11958eb2df0fe0143d5a6d18f177588974f7ba36
|
@@ -1,16 +1,6 @@
|
|
1
|
-
require 'mechanize'
|
2
|
-
require 'nokogiri'
|
3
|
-
require 'thread'
|
4
|
-
require 'fileutils'
|
5
|
-
require 'logger'
|
6
|
-
require "dotenv"
|
7
1
|
require 'csv'
|
8
|
-
require 'date'
|
9
2
|
require 'active_support/time'
|
10
3
|
require 'library_stdnums'
|
11
|
-
require 'base_scraper_service'
|
12
|
-
require_relative 'user_agent'
|
13
|
-
require_relative 'agent_object'
|
14
4
|
|
15
5
|
module BaseScraper
|
16
6
|
class Service
|
@@ -9,29 +9,34 @@ module BaseScraper
|
|
9
9
|
# 1- return state abbreviation if matched with state abbreviations
|
10
10
|
# 2- return state abbreviation if matched with state name
|
11
11
|
# 3- otherwise return empty string
|
12
|
-
state = us_states_hash.keys.select { |state_abbreviation| @address.upcase.include?(state_abbreviation) }.first
|
13
|
-
return state if state.present? && @address.length != 3
|
14
|
-
state = us_states_hash.select { |_, state| @address.include?(state) }.first&.first
|
15
12
|
|
16
|
-
|
13
|
+
@state = us_states_hash.keys.select { |state_abbreviation| @address.upcase.include?(state_abbreviation) }.first
|
14
|
+
return @state if @state.present? && @address.length != 3
|
15
|
+
@state = us_states_hash.select { |_, state| @address.include?(state) }.first&.first
|
16
|
+
|
17
|
+
return @state.to_s
|
18
|
+
|
17
19
|
end
|
18
20
|
|
21
|
+
|
22
|
+
|
19
23
|
def extract_country
|
20
24
|
# 1- return country abbreviation if matched with country abbreviations
|
21
25
|
# 2- return country abbreviation if matched with country name
|
22
26
|
# 3- return USA if address contains a USA state
|
23
27
|
# 4- otherwise return empty string
|
28
|
+
|
24
29
|
country = countries_hash.keys.select { |country_abbreviation| @address.include?(country_abbreviation) }.first
|
25
|
-
puts country, @address
|
26
30
|
return country if country.present?
|
27
|
-
country = countries_hash.select { |_, country_name| @address.include?(country_name) }.first&.first
|
31
|
+
country = countries_hash.select { |_, country_name| @address.downcase.include?(country_name.downcase) }.first&.first
|
28
32
|
return country if country.present?
|
29
33
|
|
30
|
-
return 'USA' if
|
34
|
+
return 'USA' if @state.present?
|
31
35
|
|
32
36
|
return country.to_s
|
33
37
|
end
|
34
38
|
|
39
|
+
|
35
40
|
private
|
36
41
|
|
37
42
|
def us_states_hash
|
@@ -327,7 +332,7 @@ module BaseScraper
|
|
327
332
|
'ARE' => 'United Arab Emirates',
|
328
333
|
'GBR' => 'United Kingdom of Great Britain and Northern Ireland',
|
329
334
|
'UMI' => 'United States Minor Outlying Islands',
|
330
|
-
'USA' => 'United States
|
335
|
+
'USA' => 'United States',
|
331
336
|
'URY' => 'Uruguay',
|
332
337
|
'UZB' => 'Uzbekistan',
|
333
338
|
'VUT' => 'Vanuatu',
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: base_scraper_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Qbatch
|
@@ -38,76 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: nokogiri
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: thread
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: fileutils
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: logger
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :runtime
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: date
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :runtime
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
111
41
|
- !ruby/object:Gem::Dependency
|
112
42
|
name: activesupport
|
113
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,20 +52,6 @@ dependencies:
|
|
122
52
|
- - ">="
|
123
53
|
- !ruby/object:Gem::Version
|
124
54
|
version: '0'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: dotenv
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - ">="
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0'
|
132
|
-
type: :runtime
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - ">="
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '0'
|
139
55
|
- !ruby/object:Gem::Dependency
|
140
56
|
name: csv
|
141
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,20 +66,6 @@ dependencies:
|
|
150
66
|
- - ">="
|
151
67
|
- !ruby/object:Gem::Version
|
152
68
|
version: '0'
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: json
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - ">="
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
160
|
-
type: :runtime
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - ">="
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
167
69
|
- !ruby/object:Gem::Dependency
|
168
70
|
name: library_stdnums
|
169
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,20 +80,6 @@ dependencies:
|
|
178
80
|
- - ">="
|
179
81
|
- !ruby/object:Gem::Version
|
180
82
|
version: '0'
|
181
|
-
- !ruby/object:Gem::Dependency
|
182
|
-
name: shotgun
|
183
|
-
requirement: !ruby/object:Gem::Requirement
|
184
|
-
requirements:
|
185
|
-
- - ">="
|
186
|
-
- !ruby/object:Gem::Version
|
187
|
-
version: '0'
|
188
|
-
type: :runtime
|
189
|
-
prerelease: false
|
190
|
-
version_requirements: !ruby/object:Gem::Requirement
|
191
|
-
requirements:
|
192
|
-
- - ">="
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
version: '0'
|
195
83
|
- !ruby/object:Gem::Dependency
|
196
84
|
name: puma
|
197
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -206,20 +94,6 @@ dependencies:
|
|
206
94
|
- - ">="
|
207
95
|
- !ruby/object:Gem::Version
|
208
96
|
version: '0'
|
209
|
-
- !ruby/object:Gem::Dependency
|
210
|
-
name: byebug
|
211
|
-
requirement: !ruby/object:Gem::Requirement
|
212
|
-
requirements:
|
213
|
-
- - ">="
|
214
|
-
- !ruby/object:Gem::Version
|
215
|
-
version: '0'
|
216
|
-
type: :runtime
|
217
|
-
prerelease: false
|
218
|
-
version_requirements: !ruby/object:Gem::Requirement
|
219
|
-
requirements:
|
220
|
-
- - ">="
|
221
|
-
- !ruby/object:Gem::Version
|
222
|
-
version: '0'
|
223
97
|
description: Base scraper service to handle multiple services
|
224
98
|
email:
|
225
99
|
executables: []
|