shopee 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Gemfile +1 -0
 - data/README.md +7 -7
 - data/lib/data/mobile_category.rb +1 -0
 - data/lib/shopee.rb +70 -30
 - data/lib/shopee/version.rb +2 -2
 - metadata +3 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 0dea88ac985e2a00925725ba22d00524f20f5686
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: cc396b07bf301732655b22c836f007f3e076cb52
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 5960417b9ea1160df982eba61c4f10389d2afed77f278b038a820af1bf45570f9221ac7d8fe0e959c5b3843e52bc487602b40b115eeceef57db0a953fcfc356a
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 6024ea6d9e1aac0d2d72c32ececa5592e0f607e3509646da1a4e9edfe4dbd4511b8fe0835d6b8d1c9d93a02fa426543b46d94dff8612aa909822f33dfdaf9fb4
         
     | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | 
         @@ -20,11 +20,11 @@ $ gem install shopee 
     | 
|
| 
       20 
20 
     | 
    
         
             
            ## 1. List all categories of market of Mobile01
         
     | 
| 
       21 
21 
     | 
    
         
             
            $ shopee allcate
         
     | 
| 
       22 
22 
     | 
    
         | 
| 
       23 
     | 
    
         
            -
            ## 2. List  
     | 
| 
      
 23 
     | 
    
         
            +
            ## 2. List goods by a category
         
     | 
| 
       24 
24 
     | 
    
         
             
            $ shopee list [category_name]
         
     | 
| 
       25 
25 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
            ## 3. Find top  
     | 
| 
       27 
     | 
    
         
            -
            $ shopee search [category_name] [keyword]
         
     | 
| 
      
 26 
     | 
    
         
            +
            ## 3. Find top number goods in a category by the keyword
         
     | 
| 
      
 27 
     | 
    
         
            +
            $ shopee search [category_name] [keyword] [num]
         
     | 
| 
       28 
28 
     | 
    
         | 
| 
       29 
29 
     | 
    
         
             
            ```
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
         @@ -186,7 +186,7 @@ GPS 
     | 
|
| 
       186 
186 
     | 
    
         
             
            ```
         
     | 
| 
       187 
187 
     | 
    
         | 
| 
       188 
188 
     | 
    
         | 
| 
       189 
     | 
    
         
            -
            **2. List  
     | 
| 
      
 189 
     | 
    
         
            +
            **2. List goods by a category**
         
     | 
| 
       190 
190 
     | 
    
         
             
            ```ruby
         
     | 
| 
       191 
191 
     | 
    
         
             
            require 'shopee'
         
     | 
| 
       192 
192 
     | 
    
         
             
            ##Example :category is '電腦資訊'
         
     | 
| 
         @@ -231,13 +231,13 @@ puts shopeecate.goods 
     | 
|
| 
       231 
231 
     | 
    
         
             
            {"name"=>"全新未拆 羅技MX Master  fervidity (5) 商品所在地:台中市 (可交換商品)", "price"=>"9,500元  10,000元", "num"=>"0", "update_time"=>"2015-11-15"}
         
     | 
| 
       232 
232 
     | 
    
         
             
            {"name"=>"賣 99.99 新Samsung Galaxy Tab S 8.4 白3G/16G LTE ( 可通話平...  星空下的雨 (31) 商品所在地:新北市 (可交換商品)", "price"=>nil, "num"=>nil, "update_time"=>nil}
         
     | 
| 
       233 
233 
     | 
    
         
             
            ```
         
     | 
| 
       234 
     | 
    
         
            -
            **3. Find top  
     | 
| 
      
 234 
     | 
    
         
            +
            **3. Find top num goods in a category by the keyword**
         
     | 
| 
       235 
235 
     | 
    
         
             
            ```ruby
         
     | 
| 
       236 
236 
     | 
    
         
             
            require 'shopee'
         
     | 
| 
       237 
237 
     | 
    
         
             
            ##Example :category is '電腦資訊'; keyword is 'samsung'
         
     | 
| 
       238 
     | 
    
         
            -
             
     | 
| 
      
 238 
     | 
    
         
            +
            shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(category)
         
     | 
| 
       239 
239 
     | 
    
         
             
            goods = shopeecate.goods
         
     | 
| 
       240 
     | 
    
         
            -
            puts shopeecate.search_keyword(goods, keyword)
         
     | 
| 
      
 240 
     | 
    
         
            +
            puts shopeecate.search_keyword(goods, keyword, num)
         
     | 
| 
       241 
241 
     | 
    
         
             
            ```
         
     | 
| 
       242 
242 
     | 
    
         
             
            - Expected output
         
     | 
| 
       243 
243 
     | 
    
         
             
            ```
         
     | 
    
        data/lib/data/mobile_category.rb
    CHANGED
    
    | 
         @@ -140,3 +140,4 @@ ALL_LINK = { "電腦資訊"=>"http://www.mobile01.com/mpcatlist.php?c=1", 
     | 
|
| 
       140 
140 
     | 
    
         
             
              "南部地區"=>"http://www.mobile01.com/mpcatlist.php?c=90",
         
     | 
| 
       141 
141 
     | 
    
         
             
              "東部與外島地區"=>"http://www.mobile01.com/mpcatlist.php?c=91"
         
     | 
| 
       142 
142 
     | 
    
         
             
            }
         
     | 
| 
      
 143 
     | 
    
         
            +
            CATEGORY_LIST = ["電腦資訊", "手持通訊", "攝影器材", "數位家電", "休閒旅遊", "生活用品", "汽車", "機車", "自行車", "男性時尚", "女性流行", "代購與虛擬物品", "房屋地產"]
         
     | 
    
        data/lib/shopee.rb
    CHANGED
    
    | 
         @@ -7,6 +7,7 @@ module ShopeeScrape 
     | 
|
| 
       7 
7 
     | 
    
         
             
              class ShopeeListGoodsByCate
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
                require_relative './data/mobile_category'
         
     | 
| 
      
 10 
     | 
    
         
            +
                GOOD_INFO = "//div[contains(@class, 'img')]"
         
     | 
| 
       10 
11 
     | 
    
         
             
                GOOD_NAME = "//div[contains(@class, 'subject')]"
         
     | 
| 
       11 
12 
     | 
    
         
             
                GOOD_PRICE = "//div[contains(@class, 'price')]"
         
     | 
| 
       12 
13 
     | 
    
         
             
                GOOD_NUM = "//div[contains(@class, 'num')]"
         
     | 
| 
         @@ -27,46 +28,85 @@ module ShopeeScrape 
     | 
|
| 
       27 
28 
     | 
    
         
             
                private
         
     | 
| 
       28 
29 
     | 
    
         | 
| 
       29 
30 
     | 
    
         
             
                def parse_html(id)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  @document = []
         
     | 
| 
       30 
32 
     | 
    
         
             
                  url = ALL_LINK[id]
         
     | 
| 
       31 
     | 
    
         
            -
                  @document  
     | 
| 
       32 
     | 
    
         
            -
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
                  @document << Oga.parse_html(open(url))
         
     | 
| 
       33 
34 
     | 
    
         | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
                   
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
                  num = []
         
     | 
| 
       38 
     | 
    
         
            -
                  update_time = []
         
     | 
| 
       39 
     | 
    
         
            -
                  @document.xpath(GOOD_NAME).map do |good|
         
     | 
| 
       40 
     | 
    
         
            -
                    name << good.text
         
     | 
| 
      
 35 
     | 
    
         
            +
                  page_num = 2
         
     | 
| 
      
 36 
     | 
    
         
            +
                  if CATEGORY_LIST.include?(id) == true
         
     | 
| 
      
 37 
     | 
    
         
            +
                    page_num = 5
         
     | 
| 
       41 
38 
     | 
    
         
             
                  end
         
     | 
| 
       42 
39 
     | 
    
         | 
| 
       43 
     | 
    
         
            -
                   
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
      
 40 
     | 
    
         
            +
                  check = 1
         
     | 
| 
      
 41 
     | 
    
         
            +
                  i = 1
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                  while check == 1 && i < page_num
         
     | 
| 
      
 44 
     | 
    
         
            +
                    i += 1
         
     | 
| 
      
 45 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 46 
     | 
    
         
            +
                      url_t = url +'&p='+i.to_s
         
     | 
| 
      
 47 
     | 
    
         
            +
                      open url_t, :proxy=>true
         
     | 
| 
      
 48 
     | 
    
         
            +
                      @document << Oga.parse_html(open(url_t))
         
     | 
| 
      
 49 
     | 
    
         
            +
                    rescue
         
     | 
| 
      
 50 
     | 
    
         
            +
                      check = 0
         
     | 
| 
      
 51 
     | 
    
         
            +
                    end
         
     | 
| 
       45 
52 
     | 
    
         
             
                  end
         
     | 
| 
       46 
53 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
                    num << good.text
         
     | 
| 
       49 
     | 
    
         
            -
                  end
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
       50 
55 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                    update_time << good.text
         
     | 
| 
       53 
     | 
    
         
            -
                  end
         
     | 
| 
      
 56 
     | 
    
         
            +
                def extract_goods
         
     | 
| 
       54 
57 
     | 
    
         | 
| 
       55 
     | 
    
         
            -
                  number = name.length
         
     | 
| 
       56 
58 
     | 
    
         
             
                  results = []
         
     | 
| 
       57 
     | 
    
         
            -
                  # puts number
         
     | 
| 
       58 
     | 
    
         
            -
                  if number > 32
         
     | 
| 
       59 
     | 
    
         
            -
                    number = 32
         
     | 
| 
       60 
     | 
    
         
            -
                  end
         
     | 
| 
       61 
     | 
    
         
            -
                  for i in 2..number-1
         
     | 
| 
       62 
     | 
    
         
            -
                    element = {}
         
     | 
| 
       63 
     | 
    
         
            -
                    element['name'] = name[i]
         
     | 
| 
       64 
     | 
    
         
            -
                    element['price'] = price[i]
         
     | 
| 
       65 
     | 
    
         
            -
                    element['num'] = num[i]
         
     | 
| 
       66 
     | 
    
         
            -
                    element['update_time'] = update_time[i]
         
     | 
| 
       67 
     | 
    
         
            -
                    results << element
         
     | 
| 
       68 
     | 
    
         
            -
                  end
         
     | 
| 
       69 
59 
     | 
    
         | 
| 
      
 60 
     | 
    
         
            +
                  @document.each do |doc|
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                    name = []
         
     | 
| 
      
 63 
     | 
    
         
            +
                    price = []
         
     | 
| 
      
 64 
     | 
    
         
            +
                    num = []
         
     | 
| 
      
 65 
     | 
    
         
            +
                    update_time = []
         
     | 
| 
      
 66 
     | 
    
         
            +
                    pic = []
         
     | 
| 
      
 67 
     | 
    
         
            +
                    link = []
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                    doc.xpath(GOOD_NAME).map do |good|
         
     | 
| 
      
 70 
     | 
    
         
            +
                      name << good.text
         
     | 
| 
      
 71 
     | 
    
         
            +
                    end
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                    doc.xpath(GOOD_PRICE).map do |good|
         
     | 
| 
      
 74 
     | 
    
         
            +
                      price << good.text
         
     | 
| 
      
 75 
     | 
    
         
            +
                    end
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                    doc.xpath(GOOD_NUM).map do |good|
         
     | 
| 
      
 78 
     | 
    
         
            +
                      num << good.text
         
     | 
| 
      
 79 
     | 
    
         
            +
                    end
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                    doc.xpath(GOOD_UPTIME).map do |good|
         
     | 
| 
      
 82 
     | 
    
         
            +
                      update_time << good.text
         
     | 
| 
      
 83 
     | 
    
         
            +
                    end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                    doc.xpath(GOOD_INFO).map do |good|
         
     | 
| 
      
 86 
     | 
    
         
            +
                      link << good.css('a').attribute('href')[0].to_s
         
     | 
| 
      
 87 
     | 
    
         
            +
                      # puts good.css('img').attribute('src')[0].to_s
         
     | 
| 
      
 88 
     | 
    
         
            +
                      pic << good.css('img').attribute('src')[0].to_s
         
     | 
| 
      
 89 
     | 
    
         
            +
                    end
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
                    number = name.length
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
                    # puts number
         
     | 
| 
      
 94 
     | 
    
         
            +
                    if number > 32
         
     | 
| 
      
 95 
     | 
    
         
            +
                      number = 32
         
     | 
| 
      
 96 
     | 
    
         
            +
                    end
         
     | 
| 
      
 97 
     | 
    
         
            +
                    for i in 2..number-1
         
     | 
| 
      
 98 
     | 
    
         
            +
                      element = {}
         
     | 
| 
      
 99 
     | 
    
         
            +
                      element['name'] = name[i]
         
     | 
| 
      
 100 
     | 
    
         
            +
                      element['price'] = price[i]
         
     | 
| 
      
 101 
     | 
    
         
            +
                      element['num'] = num[i]
         
     | 
| 
      
 102 
     | 
    
         
            +
                      element['update_time'] = update_time[i]
         
     | 
| 
      
 103 
     | 
    
         
            +
                      element['link'] = link[i]
         
     | 
| 
      
 104 
     | 
    
         
            +
                      element['pic'] = pic[i]
         
     | 
| 
      
 105 
     | 
    
         
            +
                      results << element
         
     | 
| 
      
 106 
     | 
    
         
            +
                    end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                  end
         
     | 
| 
      
 109 
     | 
    
         
            +
                  puts results.length
         
     | 
| 
       70 
110 
     | 
    
         
             
                  results
         
     | 
| 
       71 
111 
     | 
    
         
             
                end
         
     | 
| 
       72 
112 
     | 
    
         | 
    
        data/lib/shopee/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: shopee
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Sheng Jung Wu
         
     | 
| 
         @@ -11,7 +11,7 @@ authors: 
     | 
|
| 
       11 
11 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       12 
12 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       13 
13 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       14 
     | 
    
         
            -
            date:  
     | 
| 
      
 14 
     | 
    
         
            +
            date: 2016-01-09 00:00:00.000000000 Z
         
     | 
| 
       15 
15 
     | 
    
         
             
            dependencies:
         
     | 
| 
       16 
16 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       17 
17 
     | 
    
         
             
              name: minitest
         
     | 
| 
         @@ -125,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       125 
125 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       126 
126 
     | 
    
         
             
            requirements: []
         
     | 
| 
       127 
127 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       128 
     | 
    
         
            -
            rubygems_version: 2.4. 
     | 
| 
      
 128 
     | 
    
         
            +
            rubygems_version: 2.4.6
         
     | 
| 
       129 
129 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       130 
130 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       131 
131 
     | 
    
         
             
            summary: Scrape categories of mobile01!
         
     |