stylemooncat 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c070bc22fcc1c2130dd9b0169ee726ddc87a0e0
4
- data.tar.gz: cd7ca1c41bc4b3d5db36e9100e545e4ad4f8b975
3
+ metadata.gz: 37605c582c4d3a6f49efc9a54d55c85a477d82d4
4
+ data.tar.gz: a9c7415659c0a44728be90e95b5fc1d18a3ad50b
5
5
  SHA512:
6
- metadata.gz: 121004971b1fc756c1d6b7c18feea3053896da755c3805f53dfeae9df005dd82eda5af4cddb295db9295919e47ab373bf9efeb7d31540feaa9a9aa6858c664b5
7
- data.tar.gz: a2929d18cc6811d2f9ce4c7243379a58cf329097bce17cc8920e1927a293618a22327eac2fa2b1100da9626dd493ef8d587513fe471cedf8b6c609abe4c5d686
6
+ metadata.gz: 0d9b67cb94c808f8109e4aafa78906137b91dfda2ecc4da2bf7067c47a5cb992d71867e6e79d62c4bf720eacff7f3fcdd962061c99bb8c4a8364db67d6a0cedb
7
+ data.tar.gz: 56d7d6788c4a08e0bf6bec75c63f1863ccf575624f0c815a915242c3e52e883b48e97d02ecfb952477357ad6c2cbaccbd805851f0c37322c22394841c1610999
data/bin/stylemooncat CHANGED
@@ -4,3 +4,4 @@ require_relative '../lib/stylemooncat.rb'
4
4
 
5
5
  @scraper = StyleMoonCat::Scraper.new
6
6
  puts @scraper.scrape(ARGV[0],ARGV[1],ARGV[2],ARGV[3],ARGV[4])
7
+ #puts @scraper.scrape("shoes",{:keyword=>"none",:page_limit=>3,:price_boundary=>[0,600]})
@@ -56,92 +56,155 @@ module StyleMoonCat
56
56
  # Regular ?
57
57
  @@TITLE_REGEX = /([.\p{Han}[a-zA-Z]]+)/
58
58
 
59
- def scrape(category,page,keyword,price_from,price_to)
60
- case category
61
- when "newarrival"
62
- uri = uri_with_page(@@NEW_ARRIVALS_URI, page)
63
- when "lastweek"
64
- uri = uri_with_page(@@LAST_WEEK_URI, page)
65
- when "specialdiscount"
66
- uri = uri_with_page(@@SPECIAL_DISCOUNT_URI, page)
67
- when "top"
68
- uri = uri_with_page(@@TOP_URI, page)
69
- when "top_clothes"
70
- uri = uri_with_page(@@TOPS_Clothes_URI, page)
71
- when "top_Tshirt"
72
- uri = uri_with_page(@@TOPS_Tshirt_URI, page)
73
- when "top_vest"
74
- uri = uri_with_page(@@TOPS_Vest_URI, page)
75
- when "top_blouse"
76
- uri = uri_with_page(@@TOPS_Blouse_URI, page)
77
- when "top_knit"
78
- uri = uri_with_page(@@TOPS_Knit_URI, page)
79
- when "bottom"
80
- uri = uri_with_page(@@BOTTOM_URI, page)
81
- when "bottom_pants"
82
- uri = uri_with_page(@@BOTTOM_Pants_URI, page)
83
- when "bottom_skirts"
84
- uri = uri_with_page(@@BOTTOM_Skirts_URI, page)
85
- when "outer"
86
- uri = uri_with_page(@@OUTER_URI, page)
87
- when "outer_coat"
88
- uri = uri_with_page(@@OUTER_Coat_URI, page)
89
- when "outer_jacket"
90
- uri = uri_with_page(@@OUTER_Jacket_URI, page)
91
- when "outer_knit"
92
- uri = uri_with_page(@@OUTER_Knit_URI, page)
93
- when "outer_vest"
94
- uri = uri_with_page(@@OUTER_Vest_URI, page)
95
-
96
- when "dress"
97
- uri = uri_with_page(@@DRESS_URI, page)
98
- when "shoes_and_bag"
99
- uri = uri_with_page(@@SHOES_AND_BAGS_URI, page)
100
- when "shose"
101
- uri = uri_with_page(@@SHOES_URI, page)
102
- when "bag"
103
- uri = uri_with_page(@@BAG_URI, page)
104
- when "accessories"
105
- uri = uri_with_page(@@ACCESSORIES_URI, page)
106
- when "accessories_watch"
107
- uri = uri_with_page(@@ACCESSORIES_Watch_URI, page)
108
- when "accessories_necklace"
109
- uri = uri_with_page(@@ACCESSORIES_Necklace_URI, page)
110
- when "accessories_ring"
111
- uri = uri_with_page(@@ACCESSORIES_Ring_URI, page)
112
- when "accessories_bracelet"
113
- uri = uri_with_page(@@ACCESSORIES_Bracelet_URI, page)
114
- when "accessories_earring"
115
- uri = uri_with_page(@@ACCESSORIES_Earring_URI, page)
116
- when "accessories_muffler"
117
- uri = uri_with_page(@@ACCESSORIES_Muffler_URI, page)
118
- when "accessories_belt"
119
- uri = uri_with_page(@@ACCESSORIES_Belt_URI, page)
120
- when "accessories_haircap"
121
- uri = uri_with_page(@@ACCESSORIES_Haircap_URI, page)
122
- when "accessories_glasses"
123
- uri = uri_with_page(@@ACCESSORIES_Glasses_URI, page)
124
- when "accessories_socks"
125
- uri = uri_with_page(@@ACCESSORIES_Socks_URI, page)
126
- when "accessories_underwear"
127
- uri = uri_with_page(@@ACCESSORIES_Underwear_URI, page)
128
- when "accessories_others"
129
- uri = uri_with_page(@@ACCESSORIES_Others_URI, page)
59
+ @@IsScrpeColor=0;
60
+
61
+ @@COLOR_ITEM_XPATH = "//option"
62
+
63
+ def scrape_contain_color(category,options)
64
+ @@IsScrapeColor=1
65
+ filter_results = scrape(category,options)
66
+ filter_results_with_color = filter_results.each do |x|
67
+ # puts x[:link]
68
+ body = fetch_data(x[:link])
69
+ color = color_extract(body)
70
+ x[:colors]= color
71
+ end
72
+
73
+ return filter_results_with_color
74
+ end
75
+
76
+ def color_extract(raw)
77
+ # puts Oga.parse_html(raw).xpath(@@ITEM_XPATH).map { |item| parse(item) }
78
+ result = Oga.parse_html(raw)
79
+ .xpath(@@COLOR_ITEM_XPATH)
80
+ .select { |item| item.text.length >4 }
81
+ .map { |item| color_parse(item) }
82
+ .uniq
83
+ return result
84
+ end
85
+
86
+ def color_parse(item)
87
+ item.text.split(" ")[0].split(":")[1]
88
+ end
89
+
90
+ def scrape(category,options)
91
+ keyword= options[:keyword]
92
+ page_limit=options[:page_limit]
93
+ puts options
94
+
95
+ if options[:price_boundary]!= nil && options[:price_boundary].length ==2
96
+ if options[:price_boundary][0].to_i>options[:price_boundary][1].to_i
97
+ price_from = options[:price_boundary][1]
98
+ price_to = options[:price_boundary][0]
99
+ else
100
+ price_from = options[:price_boundary][0]
101
+ price_to = options[:price_boundary][1]
102
+ end
130
103
  else
131
- uri = uri_with_page(@@ALL_ITEMS_URI, page)
104
+ price_from = -1
105
+ price_to = -1
132
106
  end
107
+ @filter_results=[]
108
+ @count=1
109
+ 1.upto(page_limit) do
110
+ page = @count
111
+ case category
112
+ when "newarrival"
113
+ uri = uri_with_page(@@NEW_ARRIVALS_URI, page)
114
+ when "lastweek"
115
+ uri = uri_with_page(@@LAST_WEEK_URI, page)
116
+ when "specialdiscount"
117
+ uri = uri_with_page(@@SPECIAL_DISCOUNT_URI, page)
118
+ when "top"
119
+ uri = uri_with_page(@@TOP_URI, page)
120
+ when "top_clothes"
121
+ uri = uri_with_page(@@TOPS_Clothes_URI, page)
122
+ when "top_Tshirt"
123
+ uri = uri_with_page(@@TOPS_Tshirt_URI, page)
124
+ when "top_vest"
125
+ uri = uri_with_page(@@TOPS_Vest_URI, page)
126
+ when "top_blouse"
127
+ uri = uri_with_page(@@TOPS_Blouse_URI, page)
128
+ when "top_knit"
129
+ uri = uri_with_page(@@TOPS_Knit_URI, page)
130
+ when "bottom"
131
+ uri = uri_with_page(@@BOTTOM_URI, page)
132
+ when "bottom_pants"
133
+ uri = uri_with_page(@@BOTTOM_Pants_URI, page)
134
+ when "bottom_skirts"
135
+ uri = uri_with_page(@@BOTTOM_Skirts_URI, page)
136
+ when "outer"
137
+ uri = uri_with_page(@@OUTER_URI, page)
138
+ when "outer_coat"
139
+ uri = uri_with_page(@@OUTER_Coat_URI, page)
140
+ when "outer_jacket"
141
+ uri = uri_with_page(@@OUTER_Jacket_URI, page)
142
+ when "outer_knit"
143
+ uri = uri_with_page(@@OUTER_Knit_URI, page)
144
+ when "outer_vest"
145
+ uri = uri_with_page(@@OUTER_Vest_URI, page)
133
146
 
134
- if (keyword != "none") && (keyword != nil)
135
- uri = uri_with_keyword(uri,keyword)
147
+ when "dress"
148
+ uri = uri_with_page(@@DRESS_URI, page)
149
+ when "shoes_and_bag"
150
+ uri = uri_with_page(@@SHOES_AND_BAGS_URI, page)
151
+ when "shoes"
152
+ uri = uri_with_page(@@SHOES_URI, page)
153
+ when "bag"
154
+ uri = uri_with_page(@@BAG_URI, page)
155
+ when "accessories"
156
+ uri = uri_with_page(@@ACCESSORIES_URI, page)
157
+ when "accessories_watch"
158
+ uri = uri_with_page(@@ACCESSORIES_Watch_URI, page)
159
+ when "accessories_necklace"
160
+ uri = uri_with_page(@@ACCESSORIES_Necklace_URI, page)
161
+ when "accessories_ring"
162
+ uri = uri_with_page(@@ACCESSORIES_Ring_URI, page)
163
+ when "accessories_bracelet"
164
+ uri = uri_with_page(@@ACCESSORIES_Bracelet_URI, page)
165
+ when "accessories_earring"
166
+ uri = uri_with_page(@@ACCESSORIES_Earring_URI, page)
167
+ when "accessories_muffler"
168
+ uri = uri_with_page(@@ACCESSORIES_Muffler_URI, page)
169
+ when "accessories_belt"
170
+ uri = uri_with_page(@@ACCESSORIES_Belt_URI, page)
171
+ when "accessories_haircap"
172
+ uri = uri_with_page(@@ACCESSORIES_Haircap_URI, page)
173
+ when "accessories_glasses"
174
+ uri = uri_with_page(@@ACCESSORIES_Glasses_URI, page)
175
+ when "accessories_socks"
176
+ uri = uri_with_page(@@ACCESSORIES_Socks_URI, page)
177
+ when "accessories_underwear"
178
+ uri = uri_with_page(@@ACCESSORIES_Underwear_URI, page)
179
+ when "accessories_others"
180
+ uri = uri_with_page(@@ACCESSORIES_Others_URI, page)
181
+ else
182
+ uri = uri_with_page(@@ALL_ITEMS_URI, page)
183
+ end
184
+
185
+ if (keyword != "none") && (keyword != nil)
186
+ uri = uri_with_keyword(uri,keyword)
187
+ end
188
+ # puts uri
189
+ body = fetch_data(uri)
190
+ @filter_results = filter(body)
191
+
192
+ if @count==1
193
+ @combine_filter_results = @filter_results
194
+ @count +=1
195
+ else
196
+ if @filter_results.length>0
197
+ @combine_filter_results= @final_filter_results.concat(@filter_results)
198
+ end
199
+ end
136
200
  end
137
- puts uri
138
- body = fetch_data(uri)
139
- filter_results = filter(body)
201
+ @count=1
202
+
140
203
  #filter with price if there are correct price parameters
141
204
  if price_to!=nil && price_from!=nil && price_to.to_i >=price_from.to_i && price_from.to_i !=-1 && price_to.to_i !=-1
142
- return filter_results.select{|x| x[:price].to_i<=price_to.to_i && x[:price].to_i>=price_from.to_i }
205
+ return @combine_filter_results.select{|x| x[:price].to_i<=price_to.to_i && x[:price].to_i>=price_from.to_i }
143
206
  else
144
- return filter_results
207
+ return @combine_filter_results
145
208
  end
146
209
  end
147
210
 
@@ -156,7 +219,6 @@ module StyleMoonCat
156
219
  end
157
220
 
158
221
  def fetch_data(uri)
159
- # puts uri
160
222
  open(uri) {|file| file.read}
161
223
  end
162
224
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stylemooncat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Even Chang
@@ -11,10 +11,10 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2016-01-02 00:00:00.000000000 Z
14
+ date: 2016-01-03 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: This is a gem scraping StyleMoonCat's website.Input category name,page
17
- number,searcing keyword,and price range,and it will return the items with title,price,image,and
17
+ limit,searcing keyword,and price range,and it will return the items with title,price,image,and
18
18
  link in the page of the category
19
19
  email:
20
20
  - kiki44552002@gmail.com