lcbo 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +4 -0
- data/lib/lcbo/pages/store_page.rb +53 -88
- data/lib/lcbo/version.rb +1 -1
- data/spec/pages/store_pages/1.html +433 -190
- data/spec/pages/store_pages/2.html +431 -189
- data/spec/pages/store_pages.yml +59 -34
- metadata +10 -10
data/CHANGELOG.md
CHANGED
@@ -7,28 +7,18 @@ module LCBO
|
|
7
7
|
|
8
8
|
uri 'http://www.lcbo.com/lcbo-ear/jsp/storeinfo.jsp?STORE={id}&language=EN'
|
9
9
|
|
10
|
-
|
11
|
-
monday
|
12
|
-
tuesday
|
13
|
-
wednesday
|
14
|
-
thursday
|
15
|
-
friday
|
16
|
-
saturday
|
17
|
-
sunday ]
|
18
|
-
|
19
|
-
DETAIL_FIELDS = {
|
10
|
+
FEATURE_FIELDS = {
|
20
11
|
:has_wheelchair_accessability => 'wheelchair',
|
21
12
|
:has_bilingual_services => 'bilingual',
|
22
13
|
:has_product_consultant => 'consultant',
|
23
14
|
:has_tasting_bar => 'tasting',
|
24
15
|
:has_beer_cold_room => 'cold',
|
25
16
|
:has_special_occasion_permits => 'permits',
|
26
|
-
:has_vintages_corner => '
|
17
|
+
:has_vintages_corner => 'corner',
|
27
18
|
:has_parking => 'parking',
|
28
19
|
:has_transit_access => 'transit' }
|
29
20
|
|
30
21
|
on :before_parse, :verify_store_returned
|
31
|
-
on :after_parse, :verify_node_count
|
32
22
|
on :after_parse, :verify_telephone_number
|
33
23
|
|
34
24
|
emits :id do
|
@@ -36,7 +26,7 @@ module LCBO
|
|
36
26
|
end
|
37
27
|
|
38
28
|
emits :name do
|
39
|
-
CrawlKit::TitleCaseHelper[
|
29
|
+
CrawlKit::TitleCaseHelper[doc.css('.infoWindowTitle')[0].content.strip]
|
40
30
|
end
|
41
31
|
|
42
32
|
emits :tags do
|
@@ -50,7 +40,7 @@ module LCBO
|
|
50
40
|
end
|
51
41
|
|
52
42
|
emits :address_line_1 do
|
53
|
-
data = info_nodes[2].content.strip
|
43
|
+
data = info_nodes[2].content.strip
|
54
44
|
unless data
|
55
45
|
raise CrawlKit::MalformedError,
|
56
46
|
"unable to locate address for store #{idid}"
|
@@ -59,134 +49,116 @@ module LCBO
|
|
59
49
|
end
|
60
50
|
|
61
51
|
emits :address_line_2 do
|
62
|
-
data = info_nodes[
|
63
|
-
CrawlKit::TitleCaseHelper[data
|
52
|
+
data = info_nodes[3].content.strip
|
53
|
+
CrawlKit::TitleCaseHelper[data] if data != ''
|
64
54
|
end
|
65
55
|
|
66
56
|
emits :city do
|
67
|
-
|
68
|
-
|
57
|
+
pos = get_info_node_offset(4)
|
58
|
+
data = info_nodes[pos].content.strip.split(',')[0]
|
59
|
+
CrawlKit::TitleCaseHelper[data.strip] if data
|
69
60
|
end
|
70
61
|
|
71
62
|
emits :postal_code do
|
72
|
-
|
63
|
+
pos = get_info_node_offset(4)
|
64
|
+
data = info_nodes[pos].content.strip.split(',')[1]
|
73
65
|
unless data
|
74
66
|
raise CrawlKit::MalformedError,
|
75
67
|
"unable to locate postal code for store #{id}"
|
76
68
|
end
|
77
|
-
data.
|
69
|
+
data.strip.upcase
|
78
70
|
end
|
79
71
|
|
80
72
|
emits :telephone do
|
73
|
+
pos = get_info_node_offset(6)
|
81
74
|
CrawlKit::PhoneHelper[
|
82
|
-
info_nodes[
|
83
|
-
gsub(/[\n\r\t]+/, ' ').
|
84
|
-
gsub('Telephone:', '').
|
85
|
-
strip
|
75
|
+
info_nodes[pos].content.sub('Telephone:', '').strip
|
86
76
|
]
|
87
77
|
end
|
88
78
|
|
89
79
|
emits :fax do
|
90
80
|
if has_fax?
|
81
|
+
pos = (info_nodes_count - 1)
|
91
82
|
CrawlKit::PhoneHelper[
|
92
|
-
info_nodes[
|
83
|
+
info_nodes[pos].content.sub('Fax:', '').strip
|
93
84
|
]
|
94
85
|
end
|
95
86
|
end
|
96
87
|
|
97
88
|
emits :latitude do
|
98
|
-
|
89
|
+
node = doc.css('#latitude').first
|
90
|
+
node ? node[:value].to_f : nil
|
99
91
|
end
|
100
92
|
|
101
93
|
emits :longitude do
|
102
|
-
|
94
|
+
node = doc.css('#longitude').first
|
95
|
+
node ? node[:value].to_f : nil
|
103
96
|
end
|
104
97
|
|
105
|
-
|
98
|
+
Date::DAYNAMES.map { |d| d.downcase }.each do |day|
|
106
99
|
emits :"#{day}_open" do
|
107
|
-
|
100
|
+
open_close_times[day.downcase][0]
|
108
101
|
end
|
109
102
|
|
110
103
|
emits :"#{day}_close" do
|
111
|
-
|
104
|
+
open_close_times[day.downcase][1]
|
112
105
|
end
|
113
106
|
end
|
114
107
|
|
115
|
-
|
116
|
-
emits(field) {
|
108
|
+
FEATURE_FIELDS.keys.each do |field|
|
109
|
+
emits(field) { features[field] }
|
117
110
|
end
|
118
111
|
|
119
|
-
def
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
end
|
124
|
-
|
125
|
-
def details
|
126
|
-
@details ||= begin
|
127
|
-
DETAIL_FIELDS.reduce({}) do |hsh, (field, term)|
|
128
|
-
row = detail_rows.detect { |row| row.include?(term) }
|
129
|
-
value = row.include?('checked')
|
130
|
-
hsh.merge(field => value)
|
131
|
-
end
|
132
|
-
end
|
112
|
+
def get_info_node_offset(index)
|
113
|
+
pos = (info_nodes_count == 9 ? index : index + 1)
|
114
|
+
pos += (info_nodes_count == 9 ? 1 : -1) unless has_fax?
|
115
|
+
pos
|
133
116
|
end
|
134
117
|
|
135
|
-
def
|
136
|
-
@
|
137
|
-
|
118
|
+
def feature_cells
|
119
|
+
@feature_cells ||= begin
|
120
|
+
doc.css('input[type="checkbox"]').map { |el| el.parent.inner_html }
|
138
121
|
end
|
139
122
|
end
|
140
123
|
|
141
|
-
def
|
142
|
-
@
|
143
|
-
|
124
|
+
def features
|
125
|
+
@details ||= begin
|
126
|
+
Hash[FEATURE_FIELDS.map { |field, term|
|
127
|
+
cell = feature_cells.detect { |cell| cell.include?(term) }
|
128
|
+
value = cell.include?('checked')
|
129
|
+
[field, value]
|
130
|
+
}]
|
144
131
|
end
|
145
132
|
end
|
146
133
|
|
147
134
|
def has_fax?
|
148
|
-
info_nodes.to_s.include?('Fax:
|
149
|
-
end
|
150
|
-
|
151
|
-
def time_open_close(day)
|
152
|
-
open_close_times[day.to_s.downcase]
|
135
|
+
info_nodes.to_s.include?('Fax:')
|
153
136
|
end
|
154
137
|
|
155
138
|
def open_close_times
|
156
139
|
@open_close_times ||= begin
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
140
|
+
days = Date::DAYNAMES.map { |d| d.downcase }
|
141
|
+
Hash[days.each_with_index.map { |day, idx|
|
142
|
+
text = doc.css("#row#{idx}Time.hours")[0].content
|
143
|
+
next [day, [nil, nil]] if text.include?('Closed')
|
144
|
+
times = text.split('-')
|
161
145
|
open, close = *times.map { |time|
|
162
|
-
|
146
|
+
ord = time.include?('PM') ? 12 : 0
|
147
|
+
hour, min = *time.sub(/AM|PM/, '').strip.split(':').map { |t| t.to_i }
|
148
|
+
hour += ord
|
163
149
|
(hour * 60) + min
|
164
150
|
}
|
165
|
-
|
166
|
-
|
151
|
+
[day, (open == close ? [nil, nil] : [open, close])]
|
152
|
+
}]
|
167
153
|
end
|
168
154
|
end
|
169
155
|
|
170
|
-
def
|
171
|
-
|
172
|
-
end
|
173
|
-
|
174
|
-
def hours_table
|
175
|
-
container_table.css('table[width="100%"]')
|
156
|
+
def info_nodes_count
|
157
|
+
info_nodes.size
|
176
158
|
end
|
177
159
|
|
178
160
|
def info_nodes
|
179
|
-
|
180
|
-
end
|
181
|
-
|
182
|
-
def time_cells
|
183
|
-
hours_table.
|
184
|
-
css('td[width="50%"] tr').
|
185
|
-
select { |td| td.to_s =~ /[MTWTFS]{1}[onuesdhriat]{2,5}day/ }
|
186
|
-
end
|
187
|
-
|
188
|
-
def expected_node_count
|
189
|
-
has_fax? ? 8 : 7
|
161
|
+
doc.css('#storeDetails td.main_font')
|
190
162
|
end
|
191
163
|
|
192
164
|
def verify_store_returned
|
@@ -200,12 +172,5 @@ module LCBO
|
|
200
172
|
"unable to locate telephone number for store #{id}"
|
201
173
|
end
|
202
174
|
|
203
|
-
def verify_node_count
|
204
|
-
return if expected_node_count == info_nodes.size
|
205
|
-
raise CrawlKit::MalformedError,
|
206
|
-
"Expected #{expected_node_count} nodes for store #{id} but found " \
|
207
|
-
"#{info_nodes.size} instead."
|
208
|
-
end
|
209
|
-
|
210
175
|
end
|
211
176
|
end
|
data/lib/lcbo/version.rb
CHANGED