lcbo 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ Version 0.10.1
2
+
3
+ * Added `#tags` attribute to `ProductPage` and `StorePage` to provide simple
4
+ stems for full-text search.
5
+
1
6
  Version 0.10.0
2
7
 
3
8
  * Moved `CrawlKit` related errors into the `CrawlKit` namespace.
data/Gemfile CHANGED
@@ -3,3 +3,4 @@ source 'http://rubygems.org'
3
3
  gem 'typhoeus'
4
4
  gem 'nokogiri'
5
5
  gem 'unicode_utils'
6
+ gem 'stringex'
data/Gemfile.lock CHANGED
@@ -2,6 +2,7 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  nokogiri (1.4.4)
5
+ stringex (1.1.0)
5
6
  typhoeus (0.2.0)
6
7
  unicode_utils (1.0.0)
7
8
 
@@ -10,5 +11,6 @@ PLATFORMS
10
11
 
11
12
  DEPENDENCIES
12
13
  nokogiri
14
+ stringex
13
15
  typhoeus
14
16
  unicode_utils
data/README.md CHANGED
@@ -26,9 +26,9 @@ store list pages directly from the [LCBO](http://lcbo.com) website.
26
26
  ## Crawlers
27
27
 
28
28
  Some examples of crawlers exist
29
- [here](http://github.com/heycarsten/lcbo/blob/master/examples). You can also
30
- check out the
31
- [crawler spec](http://github.com/heycarsten/lcbo/blob/master/spec/crawlkit/crawler_spec.rb)
29
+ [here](https://github.com/heycarsten/lcbo/tree/master/examples/crawlers).
30
+ You can also check out the
31
+ [crawler spec](https://github.com/heycarsten/lcbo/blob/master/spec/crawlkit/crawler_spec.rb)
32
32
  to see how to interact with them.
33
33
 
34
34
  ## Installation
@@ -1,6 +1,6 @@
1
1
  class ProductListsCrawler
2
2
 
3
- include CrawlKit::Crawler
3
+ include LCBO::CrawlKit::Crawler
4
4
 
5
5
  def request(params)
6
6
  LCBO.product_list(params[:next_page] || 1)
@@ -11,7 +11,7 @@ class ProductListsCrawler
11
11
  end
12
12
 
13
13
  def reduce
14
- requests.map { |params| params[:product_nos] }.flatten
14
+ responses.map { |params| params[:product_nos] }.flatten
15
15
  end
16
16
 
17
17
  end
data/lcbo.gemspec CHANGED
@@ -17,6 +17,7 @@ Gem::Specification.new do |s|
17
17
  s.add_dependency 'typhoeus'
18
18
  s.add_dependency 'nokogiri'
19
19
  s.add_dependency 'unicode_utils'
20
+ s.add_dependency 'stringex'
20
21
 
21
22
  s.files = `git ls-files`.split(?\n)
22
23
  s.test_files = `git ls-files -- {test,spec}/*`.split(?\n)
data/lib/lcbo/crawlkit.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'nokogiri'
2
2
  require 'typhoeus'
3
3
  require 'unicode_utils'
4
+ require 'stringex'
4
5
  require 'uri'
5
6
 
6
7
  module LCBO
@@ -23,6 +24,7 @@ require 'lcbo/crawlkit/fastdate_helper'
23
24
  require 'lcbo/crawlkit/page'
24
25
  require 'lcbo/crawlkit/request'
25
26
  require 'lcbo/crawlkit/response'
27
+ require 'lcbo/crawlkit/tag_helper'
26
28
  require 'lcbo/crawlkit/request_prototype'
27
29
  require 'lcbo/crawlkit/crawler'
28
30
  require 'lcbo/crawlkit/titlecase_helper'
@@ -0,0 +1,41 @@
1
+ module LCBO
2
+ module CrawlKit
3
+ module TagHelper
4
+ DELETION_RE = /\'|\"|\\|\/|\(|\)|\[|\]|\./
5
+ WHITESPACE_RE = /\*|\+|\&|\_|\,|\s/
6
+
7
+ def self.flatten(values)
8
+ TitleCaseHelper.downcase(values.flatten.join(' ')).
9
+ gsub(DELETION_RE, '').
10
+ gsub(WHITESPACE_RE, ' ').
11
+ strip
12
+ end
13
+
14
+ def self.split(str)
15
+ [str, str.to_ascii].
16
+ join(' ').
17
+ split.
18
+ map { |word| stem(word) }.
19
+ flatten.
20
+ uniq
21
+ end
22
+
23
+ def self.stem(word)
24
+ if word.include?('-')
25
+ parts = word.split('-')
26
+ a = parts.dup
27
+ a << parts.join
28
+ a
29
+ else
30
+ word
31
+ end
32
+ end
33
+
34
+ def self.[](*values)
35
+ return [] if values.any? { |val| '' == val.to_s.strip }
36
+ split(flatten(values))
37
+ end
38
+
39
+ end
40
+ end
41
+ end
@@ -19,6 +19,17 @@ module LCBO
19
19
  CrawlKit::TitleCaseHelper[product_details_form('itemName')]
20
20
  end
21
21
 
22
+ emits :tags do
23
+ CrawlKit::TagHelper[
24
+ name,
25
+ primary_category,
26
+ secondary_category,
27
+ origin,
28
+ producer_name,
29
+ package_unit_type
30
+ ]
31
+ end
32
+
22
33
  emits :price_in_cents do
23
34
  (product_details_form('price').to_f * 100).to_i
24
35
  end
@@ -36,20 +36,20 @@ module LCBO
36
36
  query_params[:store_no].to_i
37
37
  end
38
38
 
39
- DAY_NAMES.each do |day|
40
- emits :"#{day}_open" do
41
- time_open_close(day)[0]
42
- end
43
-
44
- emits :"#{day}_close" do
45
- time_open_close(day)[1]
46
- end
47
- end
48
-
49
39
  emits :name do
50
40
  CrawlKit::TitleCaseHelper[info_nodes[1].content.strip]
51
41
  end
52
42
 
43
+ emits :tags do
44
+ CrawlKit::TagHelper[
45
+ name,
46
+ address_line_1,
47
+ address_line_2,
48
+ city,
49
+ postal_code
50
+ ]
51
+ end
52
+
53
53
  emits :address_line_1 do
54
54
  data = info_nodes[2].content.strip.split(',')[0]
55
55
  unless data
@@ -99,6 +99,16 @@ module LCBO
99
99
  location['longitude'][0].to_f
100
100
  end
101
101
 
102
+ DAY_NAMES.each do |day|
103
+ emits :"#{day}_open" do
104
+ time_open_close(day)[0]
105
+ end
106
+
107
+ emits :"#{day}_close" do
108
+ time_open_close(day)[1]
109
+ end
110
+ end
111
+
102
112
  DETAIL_FIELDS.keys.each do |field|
103
113
  emits(field) { details[field] }
104
114
  end
data/lib/lcbo/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module LCBO
2
- VERSION = '0.10.0'
2
+ VERSION = '0.10.1'
3
3
  end
@@ -0,0 +1,18 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe LCBO::CrawlKit::TagHelper do
5
+ @expectations = {
6
+ ['Hello World'] => %w[hello world],
7
+ ['Éve Picard'] => %w[éve picard eve],
8
+ ['Hello Hello World'] => %w[hello world],
9
+ ['Hello', 'Éve Picard'] => %w[hello éve picard eve],
10
+ ['Hello', 'Éve-Picard'] => %w[hello éve picard évepicard eve evepicard]
11
+ }
12
+
13
+ @expectations.each_pair do |input, expectation|
14
+ it "should tagify: #{input.inspect} to: #{expectation.inspect}" do
15
+ LCBO::CrawlKit::TagHelper[*input].must_equal expectation
16
+ end
17
+ end
18
+ end
@@ -7,6 +7,7 @@
7
7
  :expectation:
8
8
  :product_no: 122408
9
9
  :name: Pascual Toso Malbec Rose
10
+ :tags: []
10
11
  :price_in_cents: 1195
11
12
  :regular_price_in_cents: 1195
12
13
  :limited_time_offer_savings_in_cents: 0
@@ -47,6 +48,7 @@
47
48
  :expectation:
48
49
  :product_no: 18
49
50
  :name: Heineken Lager
51
+ :tags: ["heineken", "lager", "beer", "netherlands", "heinekens", "brouwerijen", "nederland", "bv", "bottle"]
50
52
  :price_in_cents: 1250
51
53
  :regular_price_in_cents: 1350
52
54
  :limited_time_offer_savings_in_cents: 100
@@ -87,6 +89,7 @@
87
89
  :expectation:
88
90
  :product_no: 280461
89
91
  :name: Dom Pérignon Brut Champagne
92
+ :tags: ["dom", "pérignon", "brut", "champagne", "wine", "france", "moet", "chandon", "sa", "bottle", "perignon"]
90
93
  :price_in_cents: 21995
91
94
  :regular_price_in_cents: 21995
92
95
  :limited_time_offer_savings_in_cents: 0
@@ -127,6 +130,7 @@
127
130
  :expectation:
128
131
  :product_no: 479154
129
132
  :name: Floris Ninkeberry Gardenbeer
133
+ :tags: ["floris", "ninkeberry", "gardenbeer", "beer", "ale", "belgium", "brouwerij", "huyghe", "bottle"]
130
134
  :price_in_cents: 250
131
135
  :regular_price_in_cents: 250
132
136
  :limited_time_offer_savings_in_cents: 0
@@ -167,6 +171,7 @@
167
171
  :expectation:
168
172
  :product_no: 148528
169
173
  :name: Bodegas Ruconia Rey Don Garcia El de Najera Crianza 2005
174
+ :tags: ["bodegas", "ruconia", "rey", "don", "garcia", "el", "de", "najera", "crianza", "2005", "wine", "red", "rioja", "spain", "sl", "bottle"]
170
175
  :price_in_cents: 1595
171
176
  :regular_price_in_cents: 1595
172
177
  :limited_time_offer_savings_in_cents: 0
@@ -207,6 +212,7 @@
207
212
  :expectation:
208
213
  :product_no: 582973
209
214
  :name: Bombay Sapphire
215
+ :tags: ["bombay", "sapphire", "spirits", "gin", "england", "united", "kingdom", "company", "ltd", "bottle"]
210
216
  :price_in_cents: 5545
211
217
  :regular_price_in_cents: 5545
212
218
  :limited_time_offer_savings_in_cents: 0
@@ -247,6 +253,7 @@
247
253
  :expectation:
248
254
  :product_no: 2183079
249
255
  :name: Lake & River Series Cabernet Franc VQA
256
+ :tags: []
250
257
  :price_in_cents: 1095
251
258
  :regular_price_in_cents: 1095
252
259
  :limited_time_offer_savings_in_cents: 0
@@ -287,6 +294,7 @@
287
294
  :expectation:
288
295
  :product_no: 114173
289
296
  :name: 05 Sauv Blanc Weinstock Cellar K
297
+ :tags: []
290
298
  :price_in_cents: 2295
291
299
  :regular_price_in_cents: 2295
292
300
  :limited_time_offer_savings_in_cents: 0
@@ -7,6 +7,7 @@
7
7
  :expectation:
8
8
  :store_no: 444
9
9
  :name: Kennedy & Sheppard
10
+ :tags: ["kennedy", "sheppard", "2356", "road", "agincourt", "mall", "toronto", "scarborough", "torontoscarborough", "m1t3h1"]
10
11
  :address_line_1: 2356 Kennedy Road
11
12
  :address_line_2: Agincourt Mall
12
13
  :city: Toronto-Scarborough
@@ -61,6 +62,7 @@
61
62
  :sunday_open: 720
62
63
  :sunday_close: 960
63
64
  :name: Flora & Clinton Street
65
+ :tags: ["flora", "clinton", "street", "1", "industrial", "road", "unit", "b", "teeswater", "n0g2s0"]
64
66
  :address_line_1: 1 Industrial Road
65
67
  :address_line_2: Unit B
66
68
  :city: Teeswater
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 10
8
- - 0
9
- version: 0.10.0
8
+ - 1
9
+ version: 0.10.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Carsten Nielsen
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-09 00:00:00 -05:00
17
+ date: 2010-12-30 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -56,6 +56,19 @@ dependencies:
56
56
  version: "0"
57
57
  type: :runtime
58
58
  version_requirements: *id003
59
+ - !ruby/object:Gem::Dependency
60
+ name: stringex
61
+ prerelease: false
62
+ requirement: &id004 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ type: :runtime
71
+ version_requirements: *id004
59
72
  description: Request and parse product, store, inventory, and product search pages directly from the official LCBO website.
60
73
  email:
61
74
  - heycarsten@gmail.com
@@ -89,6 +102,7 @@ files:
89
102
  - lib/lcbo/crawlkit/request.rb
90
103
  - lib/lcbo/crawlkit/request_prototype.rb
91
104
  - lib/lcbo/crawlkit/response.rb
105
+ - lib/lcbo/crawlkit/tag_helper.rb
92
106
  - lib/lcbo/crawlkit/titlecase_helper.rb
93
107
  - lib/lcbo/crawlkit/volume_helper.rb
94
108
  - lib/lcbo/ext.rb
@@ -107,6 +121,7 @@ files:
107
121
  - spec/crawlkit/request_prototype_spec.rb
108
122
  - spec/crawlkit/request_spec.rb
109
123
  - spec/crawlkit/response_spec.rb
124
+ - spec/crawlkit/tag_helper_spec.rb
110
125
  - spec/crawlkit/titlecase_helper_spec.rb
111
126
  - spec/crawlkit/volume_helper_spec.rb
112
127
  - spec/crawlkit_spec.rb
@@ -174,6 +189,7 @@ test_files:
174
189
  - spec/crawlkit/request_prototype_spec.rb
175
190
  - spec/crawlkit/request_spec.rb
176
191
  - spec/crawlkit/response_spec.rb
192
+ - spec/crawlkit/tag_helper_spec.rb
177
193
  - spec/crawlkit/titlecase_helper_spec.rb
178
194
  - spec/crawlkit/volume_helper_spec.rb
179
195
  - spec/crawlkit_spec.rb