lcbo 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +4 -0
- data/lib/lcbo/pages/store_page.rb +53 -88
- data/lib/lcbo/version.rb +1 -1
- data/spec/pages/store_pages/1.html +433 -190
- data/spec/pages/store_pages/2.html +431 -189
- data/spec/pages/store_pages.yml +59 -34
- metadata +10 -10
data/CHANGELOG.md
CHANGED
@@ -7,28 +7,18 @@ module LCBO
|
|
7
7
|
|
8
8
|
uri 'http://www.lcbo.com/lcbo-ear/jsp/storeinfo.jsp?STORE={id}&language=EN'
|
9
9
|
|
10
|
-
|
11
|
-
monday
|
12
|
-
tuesday
|
13
|
-
wednesday
|
14
|
-
thursday
|
15
|
-
friday
|
16
|
-
saturday
|
17
|
-
sunday ]
|
18
|
-
|
19
|
-
DETAIL_FIELDS = {
|
10
|
+
FEATURE_FIELDS = {
|
20
11
|
:has_wheelchair_accessability => 'wheelchair',
|
21
12
|
:has_bilingual_services => 'bilingual',
|
22
13
|
:has_product_consultant => 'consultant',
|
23
14
|
:has_tasting_bar => 'tasting',
|
24
15
|
:has_beer_cold_room => 'cold',
|
25
16
|
:has_special_occasion_permits => 'permits',
|
26
|
-
:has_vintages_corner => '
|
17
|
+
:has_vintages_corner => 'corner',
|
27
18
|
:has_parking => 'parking',
|
28
19
|
:has_transit_access => 'transit' }
|
29
20
|
|
30
21
|
on :before_parse, :verify_store_returned
|
31
|
-
on :after_parse, :verify_node_count
|
32
22
|
on :after_parse, :verify_telephone_number
|
33
23
|
|
34
24
|
emits :id do
|
@@ -36,7 +26,7 @@ module LCBO
|
|
36
26
|
end
|
37
27
|
|
38
28
|
emits :name do
|
39
|
-
CrawlKit::TitleCaseHelper[
|
29
|
+
CrawlKit::TitleCaseHelper[doc.css('.infoWindowTitle')[0].content.strip]
|
40
30
|
end
|
41
31
|
|
42
32
|
emits :tags do
|
@@ -50,7 +40,7 @@ module LCBO
|
|
50
40
|
end
|
51
41
|
|
52
42
|
emits :address_line_1 do
|
53
|
-
data = info_nodes[2].content.strip
|
43
|
+
data = info_nodes[2].content.strip
|
54
44
|
unless data
|
55
45
|
raise CrawlKit::MalformedError,
|
56
46
|
"unable to locate address for store #{idid}"
|
@@ -59,134 +49,116 @@ module LCBO
|
|
59
49
|
end
|
60
50
|
|
61
51
|
emits :address_line_2 do
|
62
|
-
data = info_nodes[
|
63
|
-
CrawlKit::TitleCaseHelper[data
|
52
|
+
data = info_nodes[3].content.strip
|
53
|
+
CrawlKit::TitleCaseHelper[data] if data != ''
|
64
54
|
end
|
65
55
|
|
66
56
|
emits :city do
|
67
|
-
|
68
|
-
|
57
|
+
pos = get_info_node_offset(4)
|
58
|
+
data = info_nodes[pos].content.strip.split(',')[0]
|
59
|
+
CrawlKit::TitleCaseHelper[data.strip] if data
|
69
60
|
end
|
70
61
|
|
71
62
|
emits :postal_code do
|
72
|
-
|
63
|
+
pos = get_info_node_offset(4)
|
64
|
+
data = info_nodes[pos].content.strip.split(',')[1]
|
73
65
|
unless data
|
74
66
|
raise CrawlKit::MalformedError,
|
75
67
|
"unable to locate postal code for store #{id}"
|
76
68
|
end
|
77
|
-
data.
|
69
|
+
data.strip.upcase
|
78
70
|
end
|
79
71
|
|
80
72
|
emits :telephone do
|
73
|
+
pos = get_info_node_offset(6)
|
81
74
|
CrawlKit::PhoneHelper[
|
82
|
-
info_nodes[
|
83
|
-
gsub(/[\n\r\t]+/, ' ').
|
84
|
-
gsub('Telephone:', '').
|
85
|
-
strip
|
75
|
+
info_nodes[pos].content.sub('Telephone:', '').strip
|
86
76
|
]
|
87
77
|
end
|
88
78
|
|
89
79
|
emits :fax do
|
90
80
|
if has_fax?
|
81
|
+
pos = (info_nodes_count - 1)
|
91
82
|
CrawlKit::PhoneHelper[
|
92
|
-
info_nodes[
|
83
|
+
info_nodes[pos].content.sub('Fax:', '').strip
|
93
84
|
]
|
94
85
|
end
|
95
86
|
end
|
96
87
|
|
97
88
|
emits :latitude do
|
98
|
-
|
89
|
+
node = doc.css('#latitude').first
|
90
|
+
node ? node[:value].to_f : nil
|
99
91
|
end
|
100
92
|
|
101
93
|
emits :longitude do
|
102
|
-
|
94
|
+
node = doc.css('#longitude').first
|
95
|
+
node ? node[:value].to_f : nil
|
103
96
|
end
|
104
97
|
|
105
|
-
|
98
|
+
Date::DAYNAMES.map { |d| d.downcase }.each do |day|
|
106
99
|
emits :"#{day}_open" do
|
107
|
-
|
100
|
+
open_close_times[day.downcase][0]
|
108
101
|
end
|
109
102
|
|
110
103
|
emits :"#{day}_close" do
|
111
|
-
|
104
|
+
open_close_times[day.downcase][1]
|
112
105
|
end
|
113
106
|
end
|
114
107
|
|
115
|
-
|
116
|
-
emits(field) {
|
108
|
+
FEATURE_FIELDS.keys.each do |field|
|
109
|
+
emits(field) { features[field] }
|
117
110
|
end
|
118
111
|
|
119
|
-
def
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
end
|
124
|
-
|
125
|
-
def details
|
126
|
-
@details ||= begin
|
127
|
-
DETAIL_FIELDS.reduce({}) do |hsh, (field, term)|
|
128
|
-
row = detail_rows.detect { |row| row.include?(term) }
|
129
|
-
value = row.include?('checked')
|
130
|
-
hsh.merge(field => value)
|
131
|
-
end
|
132
|
-
end
|
112
|
+
def get_info_node_offset(index)
|
113
|
+
pos = (info_nodes_count == 9 ? index : index + 1)
|
114
|
+
pos += (info_nodes_count == 9 ? 1 : -1) unless has_fax?
|
115
|
+
pos
|
133
116
|
end
|
134
117
|
|
135
|
-
def
|
136
|
-
@
|
137
|
-
|
118
|
+
def feature_cells
|
119
|
+
@feature_cells ||= begin
|
120
|
+
doc.css('input[type="checkbox"]').map { |el| el.parent.inner_html }
|
138
121
|
end
|
139
122
|
end
|
140
123
|
|
141
|
-
def
|
142
|
-
@
|
143
|
-
|
124
|
+
def features
|
125
|
+
@details ||= begin
|
126
|
+
Hash[FEATURE_FIELDS.map { |field, term|
|
127
|
+
cell = feature_cells.detect { |cell| cell.include?(term) }
|
128
|
+
value = cell.include?('checked')
|
129
|
+
[field, value]
|
130
|
+
}]
|
144
131
|
end
|
145
132
|
end
|
146
133
|
|
147
134
|
def has_fax?
|
148
|
-
info_nodes.to_s.include?('Fax:
|
149
|
-
end
|
150
|
-
|
151
|
-
def time_open_close(day)
|
152
|
-
open_close_times[day.to_s.downcase]
|
135
|
+
info_nodes.to_s.include?('Fax:')
|
153
136
|
end
|
154
137
|
|
155
138
|
def open_close_times
|
156
139
|
@open_close_times ||= begin
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
140
|
+
days = Date::DAYNAMES.map { |d| d.downcase }
|
141
|
+
Hash[days.each_with_index.map { |day, idx|
|
142
|
+
text = doc.css("#row#{idx}Time.hours")[0].content
|
143
|
+
next [day, [nil, nil]] if text.include?('Closed')
|
144
|
+
times = text.split('-')
|
161
145
|
open, close = *times.map { |time|
|
162
|
-
|
146
|
+
ord = time.include?('PM') ? 12 : 0
|
147
|
+
hour, min = *time.sub(/AM|PM/, '').strip.split(':').map { |t| t.to_i }
|
148
|
+
hour += ord
|
163
149
|
(hour * 60) + min
|
164
150
|
}
|
165
|
-
|
166
|
-
|
151
|
+
[day, (open == close ? [nil, nil] : [open, close])]
|
152
|
+
}]
|
167
153
|
end
|
168
154
|
end
|
169
155
|
|
170
|
-
def
|
171
|
-
|
172
|
-
end
|
173
|
-
|
174
|
-
def hours_table
|
175
|
-
container_table.css('table[width="100%"]')
|
156
|
+
def info_nodes_count
|
157
|
+
info_nodes.size
|
176
158
|
end
|
177
159
|
|
178
160
|
def info_nodes
|
179
|
-
|
180
|
-
end
|
181
|
-
|
182
|
-
def time_cells
|
183
|
-
hours_table.
|
184
|
-
css('td[width="50%"] tr').
|
185
|
-
select { |td| td.to_s =~ /[MTWTFS]{1}[onuesdhriat]{2,5}day/ }
|
186
|
-
end
|
187
|
-
|
188
|
-
def expected_node_count
|
189
|
-
has_fax? ? 8 : 7
|
161
|
+
doc.css('#storeDetails td.main_font')
|
190
162
|
end
|
191
163
|
|
192
164
|
def verify_store_returned
|
@@ -200,12 +172,5 @@ module LCBO
|
|
200
172
|
"unable to locate telephone number for store #{id}"
|
201
173
|
end
|
202
174
|
|
203
|
-
def verify_node_count
|
204
|
-
return if expected_node_count == info_nodes.size
|
205
|
-
raise CrawlKit::MalformedError,
|
206
|
-
"Expected #{expected_node_count} nodes for store #{id} but found " \
|
207
|
-
"#{info_nodes.size} instead."
|
208
|
-
end
|
209
|
-
|
210
175
|
end
|
211
176
|
end
|
data/lib/lcbo/version.rb
CHANGED