lcbo 0.10.0 → 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ Version 0.10.1
2
+
3
+ * Added `#tags` attribute to `ProductPage` and `StorePage` to provide simple
4
+ stems for full-text search.
5
+
1
6
  Version 0.10.0
2
7
 
3
8
  * Moved `CrawlKit` related errors into the `CrawlKit` namespace.
data/Gemfile CHANGED
@@ -3,3 +3,4 @@ source 'http://rubygems.org'
3
3
  gem 'typhoeus'
4
4
  gem 'nokogiri'
5
5
  gem 'unicode_utils'
6
+ gem 'stringex'
data/Gemfile.lock CHANGED
@@ -2,6 +2,7 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  nokogiri (1.4.4)
5
+ stringex (1.1.0)
5
6
  typhoeus (0.2.0)
6
7
  unicode_utils (1.0.0)
7
8
 
@@ -10,5 +11,6 @@ PLATFORMS
10
11
 
11
12
  DEPENDENCIES
12
13
  nokogiri
14
+ stringex
13
15
  typhoeus
14
16
  unicode_utils
data/README.md CHANGED
@@ -26,9 +26,9 @@ store list pages directly from the [LCBO](http://lcbo.com) website.
26
26
  ## Crawlers
27
27
 
28
28
  Some examples of crawlers exist
29
- [here](http://github.com/heycarsten/lcbo/blob/master/examples). You can also
30
- check out the
31
- [crawler spec](http://github.com/heycarsten/lcbo/blob/master/spec/crawlkit/crawler_spec.rb)
29
+ [here](https://github.com/heycarsten/lcbo/tree/master/examples/crawlers).
30
+ You can also check out the
31
+ [crawler spec](https://github.com/heycarsten/lcbo/blob/master/spec/crawlkit/crawler_spec.rb)
32
32
  to see how to interact with them.
33
33
 
34
34
  ## Installation
@@ -1,6 +1,6 @@
1
1
  class ProductListsCrawler
2
2
 
3
- include CrawlKit::Crawler
3
+ include LCBO::CrawlKit::Crawler
4
4
 
5
5
  def request(params)
6
6
  LCBO.product_list(params[:next_page] || 1)
@@ -11,7 +11,7 @@ class ProductListsCrawler
11
11
  end
12
12
 
13
13
  def reduce
14
- requests.map { |params| params[:product_nos] }.flatten
14
+ responses.map { |params| params[:product_nos] }.flatten
15
15
  end
16
16
 
17
17
  end
data/lcbo.gemspec CHANGED
@@ -17,6 +17,7 @@ Gem::Specification.new do |s|
17
17
  s.add_dependency 'typhoeus'
18
18
  s.add_dependency 'nokogiri'
19
19
  s.add_dependency 'unicode_utils'
20
+ s.add_dependency 'stringex'
20
21
 
21
22
  s.files = `git ls-files`.split(?\n)
22
23
  s.test_files = `git ls-files -- {test,spec}/*`.split(?\n)
data/lib/lcbo/crawlkit.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'nokogiri'
2
2
  require 'typhoeus'
3
3
  require 'unicode_utils'
4
+ require 'stringex'
4
5
  require 'uri'
5
6
 
6
7
  module LCBO
@@ -23,6 +24,7 @@ require 'lcbo/crawlkit/fastdate_helper'
23
24
  require 'lcbo/crawlkit/page'
24
25
  require 'lcbo/crawlkit/request'
25
26
  require 'lcbo/crawlkit/response'
27
+ require 'lcbo/crawlkit/tag_helper'
26
28
  require 'lcbo/crawlkit/request_prototype'
27
29
  require 'lcbo/crawlkit/crawler'
28
30
  require 'lcbo/crawlkit/titlecase_helper'
@@ -0,0 +1,41 @@
1
+ module LCBO
2
+ module CrawlKit
3
+ module TagHelper
4
+ DELETION_RE = /\'|\"|\\|\/|\(|\)|\[|\]|\./
5
+ WHITESPACE_RE = /\*|\+|\&|\_|\,|\s/
6
+
7
+ def self.flatten(values)
8
+ TitleCaseHelper.downcase(values.flatten.join(' ')).
9
+ gsub(DELETION_RE, '').
10
+ gsub(WHITESPACE_RE, ' ').
11
+ strip
12
+ end
13
+
14
+ def self.split(str)
15
+ [str, str.to_ascii].
16
+ join(' ').
17
+ split.
18
+ map { |word| stem(word) }.
19
+ flatten.
20
+ uniq
21
+ end
22
+
23
+ def self.stem(word)
24
+ if word.include?('-')
25
+ parts = word.split('-')
26
+ a = parts.dup
27
+ a << parts.join
28
+ a
29
+ else
30
+ word
31
+ end
32
+ end
33
+
34
+ def self.[](*values)
35
+ return [] if values.any? { |val| '' == val.to_s.strip }
36
+ split(flatten(values))
37
+ end
38
+
39
+ end
40
+ end
41
+ end
@@ -19,6 +19,17 @@ module LCBO
19
19
  CrawlKit::TitleCaseHelper[product_details_form('itemName')]
20
20
  end
21
21
 
22
+ emits :tags do
23
+ CrawlKit::TagHelper[
24
+ name,
25
+ primary_category,
26
+ secondary_category,
27
+ origin,
28
+ producer_name,
29
+ package_unit_type
30
+ ]
31
+ end
32
+
22
33
  emits :price_in_cents do
23
34
  (product_details_form('price').to_f * 100).to_i
24
35
  end
@@ -36,20 +36,20 @@ module LCBO
36
36
  query_params[:store_no].to_i
37
37
  end
38
38
 
39
- DAY_NAMES.each do |day|
40
- emits :"#{day}_open" do
41
- time_open_close(day)[0]
42
- end
43
-
44
- emits :"#{day}_close" do
45
- time_open_close(day)[1]
46
- end
47
- end
48
-
49
39
  emits :name do
50
40
  CrawlKit::TitleCaseHelper[info_nodes[1].content.strip]
51
41
  end
52
42
 
43
+ emits :tags do
44
+ CrawlKit::TagHelper[
45
+ name,
46
+ address_line_1,
47
+ address_line_2,
48
+ city,
49
+ postal_code
50
+ ]
51
+ end
52
+
53
53
  emits :address_line_1 do
54
54
  data = info_nodes[2].content.strip.split(',')[0]
55
55
  unless data
@@ -99,6 +99,16 @@ module LCBO
99
99
  location['longitude'][0].to_f
100
100
  end
101
101
 
102
+ DAY_NAMES.each do |day|
103
+ emits :"#{day}_open" do
104
+ time_open_close(day)[0]
105
+ end
106
+
107
+ emits :"#{day}_close" do
108
+ time_open_close(day)[1]
109
+ end
110
+ end
111
+
102
112
  DETAIL_FIELDS.keys.each do |field|
103
113
  emits(field) { details[field] }
104
114
  end
data/lib/lcbo/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module LCBO
2
- VERSION = '0.10.0'
2
+ VERSION = '0.10.1'
3
3
  end
@@ -0,0 +1,18 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe LCBO::CrawlKit::TagHelper do
5
+ @expectations = {
6
+ ['Hello World'] => %w[hello world],
7
+ ['Éve Picard'] => %w[éve picard eve],
8
+ ['Hello Hello World'] => %w[hello world],
9
+ ['Hello', 'Éve Picard'] => %w[hello éve picard eve],
10
+ ['Hello', 'Éve-Picard'] => %w[hello éve picard évepicard eve evepicard]
11
+ }
12
+
13
+ @expectations.each_pair do |input, expectation|
14
+ it "should tagify: #{input.inspect} to: #{expectation.inspect}" do
15
+ LCBO::CrawlKit::TagHelper[*input].must_equal expectation
16
+ end
17
+ end
18
+ end
@@ -7,6 +7,7 @@
7
7
  :expectation:
8
8
  :product_no: 122408
9
9
  :name: Pascual Toso Malbec Rose
10
+ :tags: []
10
11
  :price_in_cents: 1195
11
12
  :regular_price_in_cents: 1195
12
13
  :limited_time_offer_savings_in_cents: 0
@@ -47,6 +48,7 @@
47
48
  :expectation:
48
49
  :product_no: 18
49
50
  :name: Heineken Lager
51
+ :tags: ["heineken", "lager", "beer", "netherlands", "heinekens", "brouwerijen", "nederland", "bv", "bottle"]
50
52
  :price_in_cents: 1250
51
53
  :regular_price_in_cents: 1350
52
54
  :limited_time_offer_savings_in_cents: 100
@@ -87,6 +89,7 @@
87
89
  :expectation:
88
90
  :product_no: 280461
89
91
  :name: Dom Pérignon Brut Champagne
92
+ :tags: ["dom", "pérignon", "brut", "champagne", "wine", "france", "moet", "chandon", "sa", "bottle", "perignon"]
90
93
  :price_in_cents: 21995
91
94
  :regular_price_in_cents: 21995
92
95
  :limited_time_offer_savings_in_cents: 0
@@ -127,6 +130,7 @@
127
130
  :expectation:
128
131
  :product_no: 479154
129
132
  :name: Floris Ninkeberry Gardenbeer
133
+ :tags: ["floris", "ninkeberry", "gardenbeer", "beer", "ale", "belgium", "brouwerij", "huyghe", "bottle"]
130
134
  :price_in_cents: 250
131
135
  :regular_price_in_cents: 250
132
136
  :limited_time_offer_savings_in_cents: 0
@@ -167,6 +171,7 @@
167
171
  :expectation:
168
172
  :product_no: 148528
169
173
  :name: Bodegas Ruconia Rey Don Garcia El de Najera Crianza 2005
174
+ :tags: ["bodegas", "ruconia", "rey", "don", "garcia", "el", "de", "najera", "crianza", "2005", "wine", "red", "rioja", "spain", "sl", "bottle"]
170
175
  :price_in_cents: 1595
171
176
  :regular_price_in_cents: 1595
172
177
  :limited_time_offer_savings_in_cents: 0
@@ -207,6 +212,7 @@
207
212
  :expectation:
208
213
  :product_no: 582973
209
214
  :name: Bombay Sapphire
215
+ :tags: ["bombay", "sapphire", "spirits", "gin", "england", "united", "kingdom", "company", "ltd", "bottle"]
210
216
  :price_in_cents: 5545
211
217
  :regular_price_in_cents: 5545
212
218
  :limited_time_offer_savings_in_cents: 0
@@ -247,6 +253,7 @@
247
253
  :expectation:
248
254
  :product_no: 2183079
249
255
  :name: Lake & River Series Cabernet Franc VQA
256
+ :tags: []
250
257
  :price_in_cents: 1095
251
258
  :regular_price_in_cents: 1095
252
259
  :limited_time_offer_savings_in_cents: 0
@@ -287,6 +294,7 @@
287
294
  :expectation:
288
295
  :product_no: 114173
289
296
  :name: 05 Sauv Blanc Weinstock Cellar K
297
+ :tags: []
290
298
  :price_in_cents: 2295
291
299
  :regular_price_in_cents: 2295
292
300
  :limited_time_offer_savings_in_cents: 0
@@ -7,6 +7,7 @@
7
7
  :expectation:
8
8
  :store_no: 444
9
9
  :name: Kennedy & Sheppard
10
+ :tags: ["kennedy", "sheppard", "2356", "road", "agincourt", "mall", "toronto", "scarborough", "torontoscarborough", "m1t3h1"]
10
11
  :address_line_1: 2356 Kennedy Road
11
12
  :address_line_2: Agincourt Mall
12
13
  :city: Toronto-Scarborough
@@ -61,6 +62,7 @@
61
62
  :sunday_open: 720
62
63
  :sunday_close: 960
63
64
  :name: Flora & Clinton Street
65
+ :tags: ["flora", "clinton", "street", "1", "industrial", "road", "unit", "b", "teeswater", "n0g2s0"]
64
66
  :address_line_1: 1 Industrial Road
65
67
  :address_line_2: Unit B
66
68
  :city: Teeswater
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 10
8
- - 0
9
- version: 0.10.0
8
+ - 1
9
+ version: 0.10.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Carsten Nielsen
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-09 00:00:00 -05:00
17
+ date: 2010-12-30 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -56,6 +56,19 @@ dependencies:
56
56
  version: "0"
57
57
  type: :runtime
58
58
  version_requirements: *id003
59
+ - !ruby/object:Gem::Dependency
60
+ name: stringex
61
+ prerelease: false
62
+ requirement: &id004 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ type: :runtime
71
+ version_requirements: *id004
59
72
  description: Request and parse product, store, inventory, and product search pages directly from the official LCBO website.
60
73
  email:
61
74
  - heycarsten@gmail.com
@@ -89,6 +102,7 @@ files:
89
102
  - lib/lcbo/crawlkit/request.rb
90
103
  - lib/lcbo/crawlkit/request_prototype.rb
91
104
  - lib/lcbo/crawlkit/response.rb
105
+ - lib/lcbo/crawlkit/tag_helper.rb
92
106
  - lib/lcbo/crawlkit/titlecase_helper.rb
93
107
  - lib/lcbo/crawlkit/volume_helper.rb
94
108
  - lib/lcbo/ext.rb
@@ -107,6 +121,7 @@ files:
107
121
  - spec/crawlkit/request_prototype_spec.rb
108
122
  - spec/crawlkit/request_spec.rb
109
123
  - spec/crawlkit/response_spec.rb
124
+ - spec/crawlkit/tag_helper_spec.rb
110
125
  - spec/crawlkit/titlecase_helper_spec.rb
111
126
  - spec/crawlkit/volume_helper_spec.rb
112
127
  - spec/crawlkit_spec.rb
@@ -174,6 +189,7 @@ test_files:
174
189
  - spec/crawlkit/request_prototype_spec.rb
175
190
  - spec/crawlkit/request_spec.rb
176
191
  - spec/crawlkit/response_spec.rb
192
+ - spec/crawlkit/tag_helper_spec.rb
177
193
  - spec/crawlkit/titlecase_helper_spec.rb
178
194
  - spec/crawlkit/volume_helper_spec.rb
179
195
  - spec/crawlkit_spec.rb