joyceshop 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f8dbce4f8ea05d23d63f3d9c49bc5819afeb7bd7
4
- data.tar.gz: ddc747fbfb484c3f4dc4f8d3e513153a243bd01c
3
+ metadata.gz: 2c4b143619c3fe55c48d39a9b419f39d6923fce0
4
+ data.tar.gz: 1b0fd41b0f9aff1067b680b43e2260a3f7f83cae
5
5
  SHA512:
6
- metadata.gz: ed50d49f299ae7bf30e8f86f3e9284d4d9dd0eaebe325b0a9f7f3b01664e652b57faf20b57b05b17ab1b26ba6ac0149dd43693927e49890ab1b8db57e0ef1bf4
7
- data.tar.gz: c7cd6da017475ef2f3b34d92774407f3fc7916f97c416ab9b0094488f605a302d38b554096f798dd6fda6f88aac53dc2ed82f53fffc0a546fa569caddfac6740
6
+ metadata.gz: 1b7810cbdc56d47096794d986417082dbe14dc84122e8d0539dbbc2e1f0e4c243f98ba451be45642fafb93fc0f053c0c258e8e4c6fcc6b288caded95c1b961c7
7
+ data.tar.gz: bd71e91f8adda7003cd73357e1eb2495c4e262d30e39169242bca461f4baaf0caffc5925849c0a5526f3a9fc3c438e2c05b3df9eb72170b1036e40ef063aa02f
@@ -2,5 +2,6 @@
2
2
  # require 'joyceshop' # for production
3
3
  require_relative '../lib/joyceshop.rb' # for testing
4
4
 
5
- scraper = JoyceShop::Scraper.new()
5
+ scraper = JoyceShop::Scraper.new
6
6
  puts scraper.search('紗針織衫', {price_boundary: [100, 443]})
7
+ puts scraper.scrape(:tops, 1)
@@ -1,10 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'oga'
3
+ require 'uri'
3
4
  require 'open-uri'
4
5
 
5
6
  # scrape data
6
7
  module JoyceShop
7
8
  class Scraper
9
+ # Types
10
+ @@VALID_TYPES = [:tops, :popular, :pants, :pants, :accessories, :latest]
11
+
8
12
  # URI
9
13
  @@BASE_URI = 'https://www.joyce-shop.com'
10
14
  @@LATEST_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=&item2=&ya19=&keyword=&recommand=1412170001&ob=F"
@@ -12,11 +16,12 @@ module JoyceShop
12
16
  @@TOPS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=110&item2=111&ya19=&keyword=&recommand=&ob=F"
13
17
  @@PANTS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=120&item2=121&ya19=&keyword=&recommand=&ob=F"
14
18
  @@ACCESSORIES_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=140&item2=141&ya19=&keyword=&recommand=&ob=F"
19
+ @@SEARCH_URI = "#{@@BASE_URI}/PDList.asp?"
15
20
 
16
21
  # Selectors
17
22
  @@ITEM_SELECTOR = "//div[contains(@class, 'NEW_shop_list')]/ul/li/div[contains(@class, 'NEW_shop_list_pic')]"
18
- @@LINK_SELECTOR = 'a'
19
- @@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]"
23
+ @@LINK_SELECTOR = 'a[1]/@href'
24
+ @@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]/@src"
20
25
  @@ITEM_INFO_SELECTOR = "div[contains(@class, 'NEW_shop_list_info')]"
21
26
  @@TITLE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/div[1]"
22
27
  @@PRICE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/span"
@@ -58,6 +63,19 @@ module JoyceShop
58
63
  data = parse_html(body)
59
64
  filter(data, options)
60
65
  end
66
+
67
+ def search(keyword, options={})
68
+ uri = uri_with_search(keyword)
69
+ body = fetch_data(uri)
70
+ data = parse_html(body)
71
+ filter(data, options)
72
+ end
73
+
74
+ def scrape(type, page, options = {})
75
+ abort "only supports #{@@VALID_TYPES}" unless @@VALID_TYPES.include?(type.to_sym)
76
+
77
+ method = self.method(type)
78
+ method.call(page, options)
61
79
  end
62
80
 
63
81
  private
@@ -65,6 +83,10 @@ module JoyceShop
65
83
  "#{uri}&pageno=#{page}"
66
84
  end
67
85
 
86
+ def uri_with_search(keyword)
87
+ "#{@@SEARCH_URI}keyword=#{URI.escape(keyword)}"
88
+ end
89
+
68
90
  def fetch_data(uri)
69
91
  open(uri) { |file| file.read }
70
92
  end
@@ -116,13 +138,13 @@ module JoyceShop
116
138
  end
117
139
 
118
140
  def extract_images(item)
119
- image = item.xpath(@@IMAGE_SELECTOR).attribute(:src).first.value
141
+ image = item.xpath(@@IMAGE_SELECTOR).text
120
142
  image_hover = image.sub(/\.jpg/, '-h.jpg')
121
143
  ["#{@@BASE_URI}#{image}", "#{@@BASE_URI}#{image_hover}"]
122
144
  end
123
145
 
124
146
  def extract_link(item)
125
- "#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).attribute(:href).first.value}"
147
+ "#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).text}"
126
148
  end
127
149
  end
128
150
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: joyceshop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Even Chang
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2016-01-03 00:00:00.000000000 Z
14
+ date: 2016-01-04 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: This is a gem scraping joyceshop's website and returns the popular/latest
17
17
  items