lcbo 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
 - data/CHANGELOG.md +3 -0
 - data/Gemfile +6 -0
 - data/Gemfile.lock +18 -0
 - data/LICENSE +18 -0
 - data/README.md +29 -0
 - data/Rakefile +62 -0
 - data/lcbo.gemspec +29 -0
 - data/lib/lcbo.rb +23 -0
 - data/lib/lcbo/crawlers.rb +4 -0
 - data/lib/lcbo/crawlers/inventories_crawler.rb +15 -0
 - data/lib/lcbo/crawlers/product_lists_crawler.rb +23 -0
 - data/lib/lcbo/crawlers/products_crawler.rb +16 -0
 - data/lib/lcbo/crawlers/stores_crawler.rb +16 -0
 - data/lib/lcbo/crawlkit.rb +24 -0
 - data/lib/lcbo/crawlkit/eventable.rb +56 -0
 - data/lib/lcbo/crawlkit/fastdate_helper.rb +40 -0
 - data/lib/lcbo/crawlkit/page.rb +141 -0
 - data/lib/lcbo/crawlkit/request.rb +51 -0
 - data/lib/lcbo/crawlkit/request_prototype.rb +31 -0
 - data/lib/lcbo/crawlkit/response.rb +48 -0
 - data/lib/lcbo/crawlkit/titlecase_helper.rb +97 -0
 - data/lib/lcbo/crawlkit/volume_helper.rb +46 -0
 - data/lib/lcbo/ext.rb +13 -0
 - data/lib/lcbo/helpers.rb +34 -0
 - data/lib/lcbo/pages.rb +4 -0
 - data/lib/lcbo/pages/inventory_page.rb +60 -0
 - data/lib/lcbo/pages/product_list_page.rb +85 -0
 - data/lib/lcbo/pages/product_page.rb +296 -0
 - data/lib/lcbo/pages/store_page.rb +196 -0
 - data/lib/lcbo/version.rb +3 -0
 - data/spec/crawlkit/eventable_spec.rb +23 -0
 - data/spec/crawlkit/fastdate_helper_spec.rb +18 -0
 - data/spec/crawlkit/page_spec.rb +114 -0
 - data/spec/crawlkit/request_prototype_spec.rb +5 -0
 - data/spec/crawlkit/request_spec.rb +41 -0
 - data/spec/crawlkit/response_spec.rb +5 -0
 - data/spec/crawlkit/titlecase_helper_spec.rb +30 -0
 - data/spec/crawlkit/volume_helper_spec.rb +21 -0
 - data/spec/crawlkit_spec.rb +5 -0
 - data/spec/lcbo_spec.rb +38 -0
 - data/spec/pages/inventory_pages.yml +1685 -0
 - data/spec/pages/inventory_pages/1.html +11649 -0
 - data/spec/pages/inventory_pages/2.html +495 -0
 - data/spec/pages/product_list_pages.yml +108 -0
 - data/spec/pages/product_list_pages/1.html +4866 -0
 - data/spec/pages/product_pages.yml +258 -0
 - data/spec/pages/product_pages/1.html +1319 -0
 - data/spec/pages/product_pages/2.html +1343 -0
 - data/spec/pages/product_pages/3.html +1336 -0
 - data/spec/pages/product_pages/4.html +1319 -0
 - data/spec/pages/product_pages/5.html +1324 -0
 - data/spec/pages/product_pages/6.html +1319 -0
 - data/spec/pages/product_pages/7.html +1314 -0
 - data/spec/pages/store_pages.yml +80 -0
 - data/spec/pages/store_pages/1.html +592 -0
 - data/spec/pages/store_pages/2.html +592 -0
 - data/spec/pages_spec.rb +34 -0
 - data/spec/spec_helper.rb +77 -0
 - metadata +205 -0
 
| 
         @@ -0,0 +1,296 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module LCBO
         
     | 
| 
      
 2 
     | 
    
         
            +
              class ProductPage
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
                include CrawlKit::Page
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                uri 'http://lcbo.com/lcbo-ear/lcbo/product/details.do?' \
         
     | 
| 
      
 7 
     | 
    
         
            +
                    'language=EN&itemNumber={product_no}'
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                on :before_parse, :verify_response_not_blank
         
     | 
| 
      
 10 
     | 
    
         
            +
                on :after_parse,  :verify_product_details_form
         
     | 
| 
      
 11 
     | 
    
         
            +
                on :after_parse,  :verify_product_name
         
     | 
| 
      
 12 
     | 
    
         
            +
                on :after_parse,  :verify_third_info_cell
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                emits :product_no do
         
     | 
| 
      
 15 
     | 
    
         
            +
                  query_params[:product_no].to_i
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                emits :name do
         
     | 
| 
      
 19 
     | 
    
         
            +
                  CrawlKit::TitleCaseHelper[product_details_form('itemName')]
         
     | 
| 
      
 20 
     | 
    
         
            +
                end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                emits :price_in_cents do
         
     | 
| 
      
 23 
     | 
    
         
            +
                  (product_details_form('price').to_f * 100).to_i
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                emits :regular_price_in_cents do
         
     | 
| 
      
 27 
     | 
    
         
            +
                  if has_limited_time_offer
         
     | 
| 
      
 28 
     | 
    
         
            +
                    info_cell_line_after('Was:').sub('$ ', '').to_f * 100
         
     | 
| 
      
 29 
     | 
    
         
            +
                  else
         
     | 
| 
      
 30 
     | 
    
         
            +
                    price_in_cents
         
     | 
| 
      
 31 
     | 
    
         
            +
                  end
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                emits :limited_time_offer_savings_in_cents do
         
     | 
| 
      
 35 
     | 
    
         
            +
                  regular_price_in_cents - price_in_cents
         
     | 
| 
      
 36 
     | 
    
         
            +
                end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                emits :limited_time_offer_ends_on do
         
     | 
| 
      
 39 
     | 
    
         
            +
                  if has_limited_time_offer
         
     | 
| 
      
 40 
     | 
    
         
            +
                    CrawlKit::FastDateHelper[info_cell_line_after('Until')]
         
     | 
| 
      
 41 
     | 
    
         
            +
                  else
         
     | 
| 
      
 42 
     | 
    
         
            +
                    nil
         
     | 
| 
      
 43 
     | 
    
         
            +
                  end
         
     | 
| 
      
 44 
     | 
    
         
            +
                end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                emits :bonus_reward_miles do
         
     | 
| 
      
 47 
     | 
    
         
            +
                  if has_bonus_reward_miles
         
     | 
| 
      
 48 
     | 
    
         
            +
                    info_cell_line_after('Earn').to_i
         
     | 
| 
      
 49 
     | 
    
         
            +
                  else
         
     | 
| 
      
 50 
     | 
    
         
            +
                    0
         
     | 
| 
      
 51 
     | 
    
         
            +
                  end
         
     | 
| 
      
 52 
     | 
    
         
            +
                end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                emits :bonus_reward_miles_ends_on do
         
     | 
| 
      
 55 
     | 
    
         
            +
                  if has_bonus_reward_miles
         
     | 
| 
      
 56 
     | 
    
         
            +
                    CrawlKit::FastDateHelper[info_cell_line_after('Until')]
         
     | 
| 
      
 57 
     | 
    
         
            +
                  else
         
     | 
| 
      
 58 
     | 
    
         
            +
                    nil
         
     | 
| 
      
 59 
     | 
    
         
            +
                  end
         
     | 
| 
      
 60 
     | 
    
         
            +
                end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                emits :stock_type do
         
     | 
| 
      
 63 
     | 
    
         
            +
                  product_details_form('stock type')
         
     | 
| 
      
 64 
     | 
    
         
            +
                end
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                emits :primary_category do
         
     | 
| 
      
 67 
     | 
    
         
            +
                  if stock_category
         
     | 
| 
      
 68 
     | 
    
         
            +
                    cat = stock_category.split(',')[0]
         
     | 
| 
      
 69 
     | 
    
         
            +
                    cat ? cat.strip : cat
         
     | 
| 
      
 70 
     | 
    
         
            +
                  end
         
     | 
| 
      
 71 
     | 
    
         
            +
                end
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                emits :secondary_category do
         
     | 
| 
      
 74 
     | 
    
         
            +
                  if stock_category
         
     | 
| 
      
 75 
     | 
    
         
            +
                    cat = stock_category.split(',')[1]
         
     | 
| 
      
 76 
     | 
    
         
            +
                    cat ? cat.strip : cat
         
     | 
| 
      
 77 
     | 
    
         
            +
                  end
         
     | 
| 
      
 78 
     | 
    
         
            +
                end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                emits :origin do
         
     | 
| 
      
 81 
     | 
    
         
            +
                  match = find_info_line(/\AMade in: /)
         
     | 
| 
      
 82 
     | 
    
         
            +
                  if match
         
     | 
| 
      
 83 
     | 
    
         
            +
                    place = match.
         
     | 
| 
      
 84 
     | 
    
         
            +
                      gsub('Made in: ', '').
         
     | 
| 
      
 85 
     | 
    
         
            +
                      gsub('/Californie', '').
         
     | 
| 
      
 86 
     | 
    
         
            +
                      gsub('Bosnia\'Hercegovina', 'Bosnia and Herzegovina').
         
     | 
| 
      
 87 
     | 
    
         
            +
                      gsub('Is. Of', 'Island of').
         
     | 
| 
      
 88 
     | 
    
         
            +
                      gsub('Italy Quality', 'Italy').
         
     | 
| 
      
 89 
     | 
    
         
            +
                      gsub('Usa-', '').
         
     | 
| 
      
 90 
     | 
    
         
            +
                      gsub(', Rep. Of', '').
         
     | 
| 
      
 91 
     | 
    
         
            +
                      gsub('&', 'and')
         
     | 
| 
      
 92 
     | 
    
         
            +
                    place.split(',').map { |s| s.strip }.uniq.join(', ')
         
     | 
| 
      
 93 
     | 
    
         
            +
                  end
         
     | 
| 
      
 94 
     | 
    
         
            +
                end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                emits :package do
         
     | 
| 
      
 97 
     | 
    
         
            +
                  @package ||= begin
         
     | 
| 
      
 98 
     | 
    
         
            +
                    string = info_cell_lines[2]
         
     | 
| 
      
 99 
     | 
    
         
            +
                    string.include?('Price: ') ? nil : string.sub('|','').strip
         
     | 
| 
      
 100 
     | 
    
         
            +
                  end
         
     | 
| 
      
 101 
     | 
    
         
            +
                end
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
                emits :package_unit_type do
         
     | 
| 
      
 104 
     | 
    
         
            +
                  volume_helper.unit_type
         
     | 
| 
      
 105 
     | 
    
         
            +
                end
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                emits :package_unit_volume_in_milliliters do
         
     | 
| 
      
 108 
     | 
    
         
            +
                  volume_helper.unit_volume
         
     | 
| 
      
 109 
     | 
    
         
            +
                end
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                emits :total_package_units do
         
     | 
| 
      
 112 
     | 
    
         
            +
                  volume_helper.total_units
         
     | 
| 
      
 113 
     | 
    
         
            +
                end
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                emits :total_package_volume_in_milliliters do
         
     | 
| 
      
 116 
     | 
    
         
            +
                  volume_helper.package_volume
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                emits :volume_in_milliliters do
         
     | 
| 
      
 120 
     | 
    
         
            +
                  CrawlKit::VolumeHelper[package]
         
     | 
| 
      
 121 
     | 
    
         
            +
                end
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                emits :alcohol_content do
         
     | 
| 
      
 124 
     | 
    
         
            +
                  match = find_info_line(/ Alcohol\/Vol.\Z/)
         
     | 
| 
      
 125 
     | 
    
         
            +
                  if match
         
     | 
| 
      
 126 
     | 
    
         
            +
                    ac = match.gsub(/%| Alcohol\/Vol./, '').to_f
         
     | 
| 
      
 127 
     | 
    
         
            +
                    ac.zero? ? nil : (ac * 100).to_i
         
     | 
| 
      
 128 
     | 
    
         
            +
                  end
         
     | 
| 
      
 129 
     | 
    
         
            +
                end
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
                emits :sugar_content do
         
     | 
| 
      
 132 
     | 
    
         
            +
                  match = match = find_info_line(/\ASugar Content : /)
         
     | 
| 
      
 133 
     | 
    
         
            +
                  if match
         
     | 
| 
      
 134 
     | 
    
         
            +
                    match.gsub('Sugar Content : ', '')
         
     | 
| 
      
 135 
     | 
    
         
            +
                  end
         
     | 
| 
      
 136 
     | 
    
         
            +
                end
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                emits :producer_name do
         
     | 
| 
      
 139 
     | 
    
         
            +
                  match = find_info_line(/\ABy: /)
         
     | 
| 
      
 140 
     | 
    
         
            +
                  if match
         
     | 
| 
      
 141 
     | 
    
         
            +
                    CrawlKit::TitleCaseHelper[
         
     | 
| 
      
 142 
     | 
    
         
            +
                      match.gsub(/By: |Tasting Note|Serving Suggestion|NOTE:/, '')
         
     | 
| 
      
 143 
     | 
    
         
            +
                    ]
         
     | 
| 
      
 144 
     | 
    
         
            +
                  end
         
     | 
| 
      
 145 
     | 
    
         
            +
                end
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                emits :released_on do
         
     | 
| 
      
 148 
     | 
    
         
            +
                  if html.include?('Release Date:')
         
     | 
| 
      
 149 
     | 
    
         
            +
                    date = info_cell_line_after('Release Date:')
         
     | 
| 
      
 150 
     | 
    
         
            +
                    date == 'N/A' ? nil : CrawlKit::FastDateHelper[date]
         
     | 
| 
      
 151 
     | 
    
         
            +
                  else
         
     | 
| 
      
 152 
     | 
    
         
            +
                    nil
         
     | 
| 
      
 153 
     | 
    
         
            +
                  end
         
     | 
| 
      
 154 
     | 
    
         
            +
                end
         
     | 
| 
      
 155 
     | 
    
         
            +
             
     | 
| 
      
 156 
     | 
    
         
            +
                emits :is_discontinued do
         
     | 
| 
      
 157 
     | 
    
         
            +
                  html.include?('PRODUCT DISCONTINUED')
         
     | 
| 
      
 158 
     | 
    
         
            +
                end
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
                emits :has_limited_time_offer do
         
     | 
| 
      
 161 
     | 
    
         
            +
                  html.include?('<B>Limited Time Offer</B>')
         
     | 
| 
      
 162 
     | 
    
         
            +
                end
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
                emits :has_bonus_reward_miles do
         
     | 
| 
      
 165 
     | 
    
         
            +
                  html.include?('<B>Bonus Reward Miles Offer</B>')
         
     | 
| 
      
 166 
     | 
    
         
            +
                end
         
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
                emits :is_seasonal do
         
     | 
| 
      
 169 
     | 
    
         
            +
                  html.include?('<font color="#ff0000">SEASONAL/LIMITED QUANTITIES</font>')
         
     | 
| 
      
 170 
     | 
    
         
            +
                end
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
      
 172 
     | 
    
         
            +
                emits :is_vqa do
         
     | 
| 
      
 173 
     | 
    
         
            +
                  html.include?('This is a <B>VQA</B> wine')
         
     | 
| 
      
 174 
     | 
    
         
            +
                end
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
                emits :description do
         
     | 
| 
      
 177 
     | 
    
         
            +
                  if html.include?('<B>Description</B>')
         
     | 
| 
      
 178 
     | 
    
         
            +
                    match = html.match(/<B>Description<\/B><\/font><BR>\n\t\t\t(.*)<BR>\n\t\t\t<BR>/m)
         
     | 
| 
      
 179 
     | 
    
         
            +
                    match ? match.captures[0] : nil
         
     | 
| 
      
 180 
     | 
    
         
            +
                  else
         
     | 
| 
      
 181 
     | 
    
         
            +
                    nil
         
     | 
| 
      
 182 
     | 
    
         
            +
                  end
         
     | 
| 
      
 183 
     | 
    
         
            +
                end
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                emits :serving_suggestion do
         
     | 
| 
      
 186 
     | 
    
         
            +
                  if html.include?('<B>Serving Suggestion</B>')
         
     | 
| 
      
 187 
     | 
    
         
            +
                    match = html.match(/<B>Serving Suggestion<\/B><\/font><BR>\n\t\t\t(.*)<BR><BR>/m)
         
     | 
| 
      
 188 
     | 
    
         
            +
                    match ? match.captures[0] : nil
         
     | 
| 
      
 189 
     | 
    
         
            +
                  else
         
     | 
| 
      
 190 
     | 
    
         
            +
                    nil
         
     | 
| 
      
 191 
     | 
    
         
            +
                  end
         
     | 
| 
      
 192 
     | 
    
         
            +
                end
         
     | 
| 
      
 193 
     | 
    
         
            +
             
     | 
| 
      
 194 
     | 
    
         
            +
                emits :tasting_note do
         
     | 
| 
      
 195 
     | 
    
         
            +
                  if html.include?('<B>Tasting Note</B>')
         
     | 
| 
      
 196 
     | 
    
         
            +
                    match = html.match(/<B>Tasting Note<\/B><\/font><BR>\n\t\t\t(.*)<BR>\n\t\t\t<BR>/m)
         
     | 
| 
      
 197 
     | 
    
         
            +
                    match ? match.captures[0] : nil
         
     | 
| 
      
 198 
     | 
    
         
            +
                  else
         
     | 
| 
      
 199 
     | 
    
         
            +
                    nil
         
     | 
| 
      
 200 
     | 
    
         
            +
                  end
         
     | 
| 
      
 201 
     | 
    
         
            +
                end
         
     | 
| 
      
 202 
     | 
    
         
            +
             
     | 
| 
      
 203 
     | 
    
         
            +
                private
         
     | 
| 
      
 204 
     | 
    
         
            +
             
     | 
| 
      
 205 
     | 
    
         
            +
                def volume_helper
         
     | 
| 
      
 206 
     | 
    
         
            +
                  @volume_helper ||= CrawlKit::VolumeHelper.new(package)
         
     | 
| 
      
 207 
     | 
    
         
            +
                end
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                def has_package?
         
     | 
| 
      
 210 
     | 
    
         
            +
                  !info_cell_lines[2].include?('Price:')
         
     | 
| 
      
 211 
     | 
    
         
            +
                end
         
     | 
| 
      
 212 
     | 
    
         
            +
             
     | 
| 
      
 213 
     | 
    
         
            +
                def stock_category
         
     | 
| 
      
 214 
     | 
    
         
            +
                  cat = get_info_lines_at_offset(12).reject do |line|
         
     | 
| 
      
 215 
     | 
    
         
            +
                    l = line.strip
         
     | 
| 
      
 216 
     | 
    
         
            +
                    l == '' ||
         
     | 
| 
      
 217 
     | 
    
         
            +
                    l.include?('Price:') ||
         
     | 
| 
      
 218 
     | 
    
         
            +
                    l.include?('Bonus Reward Miles Offer') ||
         
     | 
| 
      
 219 
     | 
    
         
            +
                    l.include?('Value Added Promotion') ||
         
     | 
| 
      
 220 
     | 
    
         
            +
                    l.include?('Limited Time Offer') ||
         
     | 
| 
      
 221 
     | 
    
         
            +
                    l.include?('NOTE:')
         
     | 
| 
      
 222 
     | 
    
         
            +
                  end.first
         
     | 
| 
      
 223 
     | 
    
         
            +
                  cat ? cat.strip : nil
         
     | 
| 
      
 224 
     | 
    
         
            +
                end
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
                def product_details_form(name)
         
     | 
| 
      
 227 
     | 
    
         
            +
                  doc.css("form[name=\"productdetails\"] input[name=\"#{name}\"]")[0].
         
     | 
| 
      
 228 
     | 
    
         
            +
                    attributes['value'].to_s
         
     | 
| 
      
 229 
     | 
    
         
            +
                end
         
     | 
| 
      
 230 
     | 
    
         
            +
             
     | 
| 
      
 231 
     | 
    
         
            +
                def get_info_lines_at_offset(offset)
         
     | 
| 
      
 232 
     | 
    
         
            +
                  raw_info_cell_lines.select do |line|
         
     | 
| 
      
 233 
     | 
    
         
            +
                    match = line.scan(/\A[\s]+/)[0]
         
     | 
| 
      
 234 
     | 
    
         
            +
                    match ? offset == match.size : false
         
     | 
| 
      
 235 
     | 
    
         
            +
                  end
         
     | 
| 
      
 236 
     | 
    
         
            +
                end
         
     | 
| 
      
 237 
     | 
    
         
            +
             
     | 
| 
      
 238 
     | 
    
         
            +
                def info_cell_text
         
     | 
| 
      
 239 
     | 
    
         
            +
                  @info_cell_text ||= info_cell_lines.join("\n")
         
     | 
| 
      
 240 
     | 
    
         
            +
                end
         
     | 
| 
      
 241 
     | 
    
         
            +
             
     | 
| 
      
 242 
     | 
    
         
            +
                def find_info_line(regexp)
         
     | 
| 
      
 243 
     | 
    
         
            +
                  info_cell_lines.select { |l| l =~ regexp }.first
         
     | 
| 
      
 244 
     | 
    
         
            +
                end
         
     | 
| 
      
 245 
     | 
    
         
            +
             
     | 
| 
      
 246 
     | 
    
         
            +
                def raw_info_cell_lines
         
     | 
| 
      
 247 
     | 
    
         
            +
                  @raw_info_cell_lines ||= info_cell_element.content.split(/\n/)
         
     | 
| 
      
 248 
     | 
    
         
            +
                end
         
     | 
| 
      
 249 
     | 
    
         
            +
             
     | 
| 
      
 250 
     | 
    
         
            +
                def info_cell_lines
         
     | 
| 
      
 251 
     | 
    
         
            +
                  @info_cell_lines ||= begin
         
     | 
| 
      
 252 
     | 
    
         
            +
                    raw_info_cell_lines.map { |l| l.strip }.reject { |l| l == '' }
         
     | 
| 
      
 253 
     | 
    
         
            +
                  end
         
     | 
| 
      
 254 
     | 
    
         
            +
                end
         
     | 
| 
      
 255 
     | 
    
         
            +
             
     | 
| 
      
 256 
     | 
    
         
            +
                def info_cell_line_after(item)
         
     | 
| 
      
 257 
     | 
    
         
            +
                  i = info_cell_lines.index(item)
         
     | 
| 
      
 258 
     | 
    
         
            +
                  return unless i
         
     | 
| 
      
 259 
     | 
    
         
            +
                  info_cell_lines[i + 1]
         
     | 
| 
      
 260 
     | 
    
         
            +
                end
         
     | 
| 
      
 261 
     | 
    
         
            +
             
     | 
| 
      
 262 
     | 
    
         
            +
                def info_cell_html
         
     | 
| 
      
 263 
     | 
    
         
            +
                  @info_cell_html ||= info_cell_element.inner_html
         
     | 
| 
      
 264 
     | 
    
         
            +
                end
         
     | 
| 
      
 265 
     | 
    
         
            +
             
     | 
| 
      
 266 
     | 
    
         
            +
                def info_cell_element
         
     | 
| 
      
 267 
     | 
    
         
            +
                  doc.css('table[width="478"] td[height="271"] td[colspan="2"].main_font')[0]
         
     | 
| 
      
 268 
     | 
    
         
            +
                end
         
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
                def verify_third_info_cell
         
     | 
| 
      
 271 
     | 
    
         
            +
                  return unless has_package? && info_cell_lines[2][0,1] != '|'
         
     | 
| 
      
 272 
     | 
    
         
            +
                  raise CrawlKit::MalformedDocumentError,
         
     | 
| 
      
 273 
     | 
    
         
            +
                    "Expected third line in info cell to begin with bar. LCBO No: " \
         
     | 
| 
      
 274 
     | 
    
         
            +
                    "#{product_no}, Dump: #{info_cell_lines[2].inspect}"
         
     | 
| 
      
 275 
     | 
    
         
            +
                end
         
     | 
| 
      
 276 
     | 
    
         
            +
             
     | 
| 
      
 277 
     | 
    
         
            +
                def verify_response_not_blank
         
     | 
| 
      
 278 
     | 
    
         
            +
                  return unless html.strip == ''
         
     | 
| 
      
 279 
     | 
    
         
            +
                  raise CrawlKit::MissingResourceError,
         
     | 
| 
      
 280 
     | 
    
         
            +
                    "product #{product_no} does not appear to exist"
         
     | 
| 
      
 281 
     | 
    
         
            +
                end
         
     | 
| 
      
 282 
     | 
    
         
            +
             
     | 
| 
      
 283 
     | 
    
         
            +
                def verify_product_name
         
     | 
| 
      
 284 
     | 
    
         
            +
                  return unless product_details_form('itemName').strip == ''
         
     | 
| 
      
 285 
     | 
    
         
            +
                  raise CrawlKit::MissingResourceError,
         
     | 
| 
      
 286 
     | 
    
         
            +
                    "can not locate name for product #{product_no}"
         
     | 
| 
      
 287 
     | 
    
         
            +
                end
         
     | 
| 
      
 288 
     | 
    
         
            +
             
     | 
| 
      
 289 
     | 
    
         
            +
                def verify_product_details_form
         
     | 
| 
      
 290 
     | 
    
         
            +
                  return unless doc.css('form[name="productdetails"]').empty?
         
     | 
| 
      
 291 
     | 
    
         
            +
                  raise CrawlKit::MalformedDocumentError,
         
     | 
| 
      
 292 
     | 
    
         
            +
                    "productdetails form not found in doc for product #{product_no}"
         
     | 
| 
      
 293 
     | 
    
         
            +
                end
         
     | 
| 
      
 294 
     | 
    
         
            +
             
     | 
| 
      
 295 
     | 
    
         
            +
              end
         
     | 
| 
      
 296 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,196 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'cgi'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module LCBO
         
     | 
| 
      
 4 
     | 
    
         
            +
              class StorePage
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                include CrawlKit::Page
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                uri 'http://www.lcbo.com/lcbo-ear/jsp/storeinfo.jsp?' \
         
     | 
| 
      
 9 
     | 
    
         
            +
                    'STORE={store_no}&language=EN'
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                DAY_NAMES = %w[
         
     | 
| 
      
 12 
     | 
    
         
            +
                  monday
         
     | 
| 
      
 13 
     | 
    
         
            +
                  tuesday
         
     | 
| 
      
 14 
     | 
    
         
            +
                  wednesday
         
     | 
| 
      
 15 
     | 
    
         
            +
                  thursday
         
     | 
| 
      
 16 
     | 
    
         
            +
                  friday
         
     | 
| 
      
 17 
     | 
    
         
            +
                  saturday
         
     | 
| 
      
 18 
     | 
    
         
            +
                  sunday ]
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                DETAIL_FIELDS = {
         
     | 
| 
      
 21 
     | 
    
         
            +
                  :has_wheelchair_accessability => 'wheelchair',
         
     | 
| 
      
 22 
     | 
    
         
            +
                  :has_bilingual_services       => 'bilingual',
         
     | 
| 
      
 23 
     | 
    
         
            +
                  :has_product_consultant       => 'consultant',
         
     | 
| 
      
 24 
     | 
    
         
            +
                  :has_tasting_bar              => 'tasting',
         
     | 
| 
      
 25 
     | 
    
         
            +
                  :has_beer_cold_room           => 'cold',
         
     | 
| 
      
 26 
     | 
    
         
            +
                  :has_special_occasion_permits => 'permits',
         
     | 
| 
      
 27 
     | 
    
         
            +
                  :has_vintages_corner          => 'vintages',
         
     | 
| 
      
 28 
     | 
    
         
            +
                  :has_parking                  => 'parking',
         
     | 
| 
      
 29 
     | 
    
         
            +
                  :has_transit_access           => 'transit' }
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                on :before_parse, :verify_store_returned
         
     | 
| 
      
 32 
     | 
    
         
            +
                on :after_parse,  :verify_node_count
         
     | 
| 
      
 33 
     | 
    
         
            +
                on :after_parse,  :verify_telephone_number
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                emits :store_no do
         
     | 
| 
      
 36 
     | 
    
         
            +
                  query_params[:store_no].to_i
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                DAY_NAMES.each do |day|
         
     | 
| 
      
 40 
     | 
    
         
            +
                  emits :"#{day}_open" do
         
     | 
| 
      
 41 
     | 
    
         
            +
                    time_open_close(day)[0]
         
     | 
| 
      
 42 
     | 
    
         
            +
                  end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                  emits :"#{day}_close" do
         
     | 
| 
      
 45 
     | 
    
         
            +
                    time_open_close(day)[1]
         
     | 
| 
      
 46 
     | 
    
         
            +
                  end
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                emits :name do
         
     | 
| 
      
 50 
     | 
    
         
            +
                  CrawlKit::TitleCaseHelper[info_nodes[1].content.strip]
         
     | 
| 
      
 51 
     | 
    
         
            +
                end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                emits :address_line_1 do
         
     | 
| 
      
 54 
     | 
    
         
            +
                  data = info_nodes[2].content.strip.split(',')[0]
         
     | 
| 
      
 55 
     | 
    
         
            +
                  unless data
         
     | 
| 
      
 56 
     | 
    
         
            +
                    raise MalformedDocumentError,
         
     | 
| 
      
 57 
     | 
    
         
            +
                      "unable to locate address for store #{store_no}"
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
      
 59 
     | 
    
         
            +
                  CrawlKit::TitleCaseHelper[data.gsub(/[\n\r\t]+/, ' ').strip]
         
     | 
| 
      
 60 
     | 
    
         
            +
                end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                emits :address_line_2 do
         
     | 
| 
      
 63 
     | 
    
         
            +
                  data = info_nodes[2].content.strip.split(',')[1]
         
     | 
| 
      
 64 
     | 
    
         
            +
                  CrawlKit::TitleCaseHelper[data.gsub(/[\n\r\t]+/, ' ').strip] if data
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                emits :city do
         
     | 
| 
      
 68 
     | 
    
         
            +
                  data = info_nodes[3].content.strip.split(',')[0]
         
     | 
| 
      
 69 
     | 
    
         
            +
                  CrawlKit::TitleCaseHelper[data.gsub(/[\n\r\t]+/, ' ').strip] if data
         
     | 
| 
      
 70 
     | 
    
         
            +
                end
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                emits :postal_code do
         
     | 
| 
      
 73 
     | 
    
         
            +
                  data = info_nodes[3].content.strip.split(',')[1]
         
     | 
| 
      
 74 
     | 
    
         
            +
                  unless data
         
     | 
| 
      
 75 
     | 
    
         
            +
                    raise MalformedDocumentError,
         
     | 
| 
      
 76 
     | 
    
         
            +
                    "unable to locate postal code for store #{store_no}"
         
     | 
| 
      
 77 
     | 
    
         
            +
                  end
         
     | 
| 
      
 78 
     | 
    
         
            +
                  data.gsub(/[\n\r\t]+/, ' ').strip.upcase
         
     | 
| 
      
 79 
     | 
    
         
            +
                end
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                emits :telephone do
         
     | 
| 
      
 82 
     | 
    
         
            +
                  info_nodes[4].content.
         
     | 
| 
      
 83 
     | 
    
         
            +
                    gsub(/[\n\r\t]+/, ' ').
         
     | 
| 
      
 84 
     | 
    
         
            +
                    gsub('Telephone:', '').
         
     | 
| 
      
 85 
     | 
    
         
            +
                    strip
         
     | 
| 
      
 86 
     | 
    
         
            +
                end
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                emits :fax do
         
     | 
| 
      
 89 
     | 
    
         
            +
                  if has_fax?
         
     | 
| 
      
 90 
     | 
    
         
            +
                    info_nodes[5].content.gsub(/[\n\r\t]+/, ' ').gsub('Fax:', '').strip
         
     | 
| 
      
 91 
     | 
    
         
            +
                  end
         
     | 
| 
      
 92 
     | 
    
         
            +
                end
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                emits :latitude do
         
     | 
| 
      
 95 
     | 
    
         
            +
                  location['latitude'][0].to_f
         
     | 
| 
      
 96 
     | 
    
         
            +
                end
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                emits :longitude do
         
     | 
| 
      
 99 
     | 
    
         
            +
                  location['longitude'][0].to_f
         
     | 
| 
      
 100 
     | 
    
         
            +
                end
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
                DETAIL_FIELDS.keys.each do |field|
         
     | 
| 
      
 103 
     | 
    
         
            +
                  emits(field) { details[field] }
         
     | 
| 
      
 104 
     | 
    
         
            +
                end
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
                protected
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                def detail_rows
         
     | 
| 
      
 109 
     | 
    
         
            +
                  @detail_rows ||= begin
         
     | 
| 
      
 110 
     | 
    
         
            +
                    doc.css('input[type="checkbox"]').map { |e| e.parent.parent.inner_html }
         
     | 
| 
      
 111 
     | 
    
         
            +
                  end
         
     | 
| 
      
 112 
     | 
    
         
            +
                end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                def details
         
     | 
| 
      
 115 
     | 
    
         
            +
                  @details ||= begin
         
     | 
| 
      
 116 
     | 
    
         
            +
                    DETAIL_FIELDS.reduce({}) do |hsh, (field, term)|
         
     | 
| 
      
 117 
     | 
    
         
            +
                      row   = detail_rows.detect { |row| row.include?(term) }
         
     | 
| 
      
 118 
     | 
    
         
            +
                      value = row.include?('checked')
         
     | 
| 
      
 119 
     | 
    
         
            +
                      hsh.merge(field => value)
         
     | 
| 
      
 120 
     | 
    
         
            +
                    end
         
     | 
| 
      
 121 
     | 
    
         
            +
                  end
         
     | 
| 
      
 122 
     | 
    
         
            +
                end
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                def map_anchor_href
         
     | 
| 
      
 125 
     | 
    
         
            +
                  info_nodes[has_fax? ? 6 : 5].css('a').first.attributes['href'].to_s
         
     | 
| 
      
 126 
     | 
    
         
            +
                end
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                def location
         
     | 
| 
      
 129 
     | 
    
         
            +
                  CGI.parse(URI.parse(map_anchor_href).query)
         
     | 
| 
      
 130 
     | 
    
         
            +
                end
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                def has_fax?
         
     | 
| 
      
 133 
     | 
    
         
            +
                  info_nodes.to_s.include?('Fax: ')
         
     | 
| 
      
 134 
     | 
    
         
            +
                end
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
                def time_open_close(day)
         
     | 
| 
      
 137 
     | 
    
         
            +
                  open_close_times[day.to_s.downcase]
         
     | 
| 
      
 138 
     | 
    
         
            +
                end
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
                def open_close_times
         
     | 
| 
      
 141 
     | 
    
         
            +
                  @open_close_times ||= begin
         
     | 
| 
      
 142 
     | 
    
         
            +
                    time_cells.inject({}) do |hsh, td|
         
     | 
| 
      
 143 
     | 
    
         
            +
                      text = td.text.gsub(/\s+/, ' ')
         
     | 
| 
      
 144 
     | 
    
         
            +
                      day = text.match(/[MTWTFS]{1}[a-z]+/).to_s.downcase
         
     | 
| 
      
 145 
     | 
    
         
            +
                      times = text.scan(/[0-9]{1,2}:[0-9]{2}/)
         
     | 
| 
      
 146 
     | 
    
         
            +
                      open, close = *times.map { |time|
         
     | 
| 
      
 147 
     | 
    
         
            +
                        hour, min = *time.split(':').map { |t| t.to_i }
         
     | 
| 
      
 148 
     | 
    
         
            +
                        (hour * 60) + min
         
     | 
| 
      
 149 
     | 
    
         
            +
                      }
         
     | 
| 
      
 150 
     | 
    
         
            +
                      hsh.merge(day => (open == close ? [nil, nil] : [open, close]))
         
     | 
| 
      
 151 
     | 
    
         
            +
                    end
         
     | 
| 
      
 152 
     | 
    
         
            +
                  end
         
     | 
| 
      
 153 
     | 
    
         
            +
                end
         
     | 
| 
      
 154 
     | 
    
         
            +
             
     | 
| 
      
 155 
     | 
    
         
            +
                def container_table
         
     | 
| 
      
 156 
     | 
    
         
            +
                  @doc.css('table.border[width="478"]')
         
     | 
| 
      
 157 
     | 
    
         
            +
                end
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
                def hours_table
         
     | 
| 
      
 160 
     | 
    
         
            +
                  container_table.css('table[width="100%"]')
         
     | 
| 
      
 161 
     | 
    
         
            +
                end
         
     | 
| 
      
 162 
     | 
    
         
            +
             
     | 
| 
      
 163 
     | 
    
         
            +
                def info_nodes
         
     | 
| 
      
 164 
     | 
    
         
            +
                  container_table.css('td[width="48%"]')
         
     | 
| 
      
 165 
     | 
    
         
            +
                end
         
     | 
| 
      
 166 
     | 
    
         
            +
             
     | 
| 
      
 167 
     | 
    
         
            +
                def time_cells
         
     | 
| 
      
 168 
     | 
    
         
            +
                  hours_table.
         
     | 
| 
      
 169 
     | 
    
         
            +
                    css('td[width="50%"] tr').
         
     | 
| 
      
 170 
     | 
    
         
            +
                    select { |td| td.to_s =~ /[MTWTFS]{1}[onuesdhriat]{2,5}day/ }
         
     | 
| 
      
 171 
     | 
    
         
            +
                end
         
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
      
 173 
     | 
    
         
            +
                def expected_node_count
         
     | 
| 
      
 174 
     | 
    
         
            +
                  has_fax? ? 8 : 7
         
     | 
| 
      
 175 
     | 
    
         
            +
                end
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                def verify_store_returned
         
     | 
| 
      
 178 
     | 
    
         
            +
                  return if !@html.include?('No stores were located using your criteria.')
         
     | 
| 
      
 179 
     | 
    
         
            +
                  raise MissingResourceError, "store #{store_no} does not exist"
         
     | 
| 
      
 180 
     | 
    
         
            +
                end
         
     | 
| 
      
 181 
     | 
    
         
            +
             
     | 
| 
      
 182 
     | 
    
         
            +
                def verify_telephone_number
         
     | 
| 
      
 183 
     | 
    
         
            +
                  return if telephone
         
     | 
| 
      
 184 
     | 
    
         
            +
                  raise MalformedDocumentError,
         
     | 
| 
      
 185 
     | 
    
         
            +
                    "unable to locate telephone number for store #{store_no}"
         
     | 
| 
      
 186 
     | 
    
         
            +
                end
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
                def verify_node_count
         
     | 
| 
      
 189 
     | 
    
         
            +
                  return if expected_node_count == info_nodes.size
         
     | 
| 
      
 190 
     | 
    
         
            +
                  raise MalformedDocumentError,
         
     | 
| 
      
 191 
     | 
    
         
            +
                    "Expected #{expected_node_count} nodes for store #{store_no} but found " \
         
     | 
| 
      
 192 
     | 
    
         
            +
                    "#{info_nodes.size} instead."
         
     | 
| 
      
 193 
     | 
    
         
            +
                end
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
              end
         
     | 
| 
      
 196 
     | 
    
         
            +
            end
         
     |