queenshop 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/queenshop +6 -0
- data/lib/queenshop.rb +3 -0
- data/lib/queenshop/config.rb +58 -0
- data/lib/queenshop/scraper.rb +79 -0
- metadata +56 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 4ccb94cb12167097b2f0d30b5602d911d37c4295
         | 
| 4 | 
            +
              data.tar.gz: 68c743d55718026e871bf2b041adfe2e2b2329df
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: 721a7e7e251081cbdbe220481ffa5038b2bd4a80f6ae6e393207081b106730b1f91ee732dbaa779a9797c24f0ee3e025f0a2d0295aa8418d333c01694aa34df7
         | 
| 7 | 
            +
              data.tar.gz: 5a925e85811e0f6eeba1848e8a63d0f6604d59a98aaf220e899f9b1b6cf3f9a0b99bef679037c3c591153a94f982c265919c7510a46c40266467d112e6f28567
         | 
    
        data/bin/queenshop
    ADDED
    
    
    
        data/lib/queenshop.rb
    ADDED
    
    
| @@ -0,0 +1,58 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # this class takes care of
         | 
| 4 | 
            +
            # parsing the parameters
         | 
| 5 | 
            +
            module Validate
         | 
| 6 | 
            +
              attr_reader :parameters
         | 
| 7 | 
            +
              attr_reader :pages
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              VALID_ARGS = [:item, :price, :pages]
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              def validate_args(args)
         | 
| 12 | 
            +
                @parameters = {item: '', price: '', pages: '1..2'}
         | 
| 13 | 
            +
                args.each do |arg|
         | 
| 14 | 
            +
                  begin
         | 
| 15 | 
            +
                    match = /(?<key>.*?)=(?<value>.*)/.match(arg)
         | 
| 16 | 
            +
                    fail unless VALID_ARGS.include?(match[:key].to_sym)
         | 
| 17 | 
            +
                    value = check(match)
         | 
| 18 | 
            +
                    @parameters[match[:key].to_sym] = value
         | 
| 19 | 
            +
                  rescue StandardError
         | 
| 20 | 
            +
                    abort "invalid usage...\n" << usage << "\n\n"
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
              end # end validate_args
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              def check(match)
         | 
| 26 | 
            +
                value = match[:value]
         | 
| 27 | 
            +
                fail unless value =~ /^(>|<|>=|<=|==)\d*.\d*?$/ if match[:key].to_sym.eql?(:price)
         | 
| 28 | 
            +
                # Float(value) if match[:key].to_sym.eql?(:price)
         | 
| 29 | 
            +
                fail unless value =~ /^\d*([.]{2}\d*)?$/ if match[:key].to_sym.eql?(:pages)
         | 
| 30 | 
            +
                value
         | 
| 31 | 
            +
              rescue StandardError
         | 
| 32 | 
            +
                  abort "invalid parameters"
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              def pages
         | 
| 36 | 
            +
                first_page = @parameters[:pages].scan(/\d+/).first.to_i
         | 
| 37 | 
            +
                last_page = @parameters[:pages].scan(/\d+/).last.to_i
         | 
| 38 | 
            +
                @pages = *(first_page..last_page)
         | 
| 39 | 
            +
              end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
              def usage
         | 
| 42 | 
            +
                'Usage: queenshop [options]
         | 
| 43 | 
            +
                  item=(string)
         | 
| 44 | 
            +
                  price=(float[,float])
         | 
| 45 | 
            +
                  examples:
         | 
| 46 | 
            +
                          queenshop item="blouse" price=300
         | 
| 47 | 
            +
                          queenshop price=0,100
         | 
| 48 | 
            +
                          queenshop item="skirt"'
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
            end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            class QConfig
         | 
| 53 | 
            +
              include Validate
         | 
| 54 | 
            +
              def initialize (args)
         | 
| 55 | 
            +
                validate_args (args)
         | 
| 56 | 
            +
                pages
         | 
| 57 | 
            +
              end
         | 
| 58 | 
            +
            end
         | 
| @@ -0,0 +1,79 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            require 'oga'
         | 
| 3 | 
            +
            require 'open-uri'
         | 
| 4 | 
            +
            require_relative './config'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # scrape data
         | 
| 7 | 
            +
            module QueenShopScraper
         | 
| 8 | 
            +
              # filter class basically uses xpath selectors to get attribs
         | 
| 9 | 
            +
              class Filter
         | 
| 10 | 
            +
                attr_reader :result
         | 
| 11 | 
            +
                attr_writer :item_selector
         | 
| 12 | 
            +
                attr_writer :title_selector
         | 
| 13 | 
            +
                attr_writer :price_selector
         | 
| 14 | 
            +
                attr_writer :site_url
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                private
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def get_xmldata(url)
         | 
| 19 | 
            +
                  raw_html = open(url)
         | 
| 20 | 
            +
                  Oga.parse_html(raw_html)
         | 
| 21 | 
            +
                rescue StandardError
         | 
| 22 | 
            +
                  'error'
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                def fetch_result(uri = '')
         | 
| 26 | 
            +
                  url = @site_url + uri
         | 
| 27 | 
            +
                  # try to open the url
         | 
| 28 | 
            +
                  document = get_xmldata(url)
         | 
| 29 | 
            +
                  # hard return on an error
         | 
| 30 | 
            +
                  return [] unless document != 'error'
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                  items = document.xpath(@item_selector)
         | 
| 33 | 
            +
                  # loop through the items and get the title and price
         | 
| 34 | 
            +
                  items.map do |item|
         | 
| 35 | 
            +
                    title = item.xpath(@title_selector).text.force_encoding('UTF-8')
         | 
| 36 | 
            +
                    price = item.xpath(@price_selector).text
         | 
| 37 | 
            +
                    strip_filter(title, price)
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                  @result
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def strip_filter (title, price)
         | 
| 43 | 
            +
                  price = price.gsub!(/NT. /, '')
         | 
| 44 | 
            +
                  if !@price_filter.empty?
         | 
| 45 | 
            +
                    if eval("#{price} #{@price_filter}")
         | 
| 46 | 
            +
                      @result << { title: "#{title}", price: "#{price}" }
         | 
| 47 | 
            +
                    end
         | 
| 48 | 
            +
                  else
         | 
| 49 | 
            +
                    @result << { title: "#{title}", price: "#{price}" } unless title.empty?
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                public
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                def initialize
         | 
| 57 | 
            +
                  @result = []
         | 
| 58 | 
            +
                  # xml selectors that will be used to scrape data
         | 
| 59 | 
            +
                  @item_selector = "//div[@class=\'pditem\']/div[@class=\'pdicon\']"
         | 
| 60 | 
            +
                  @title_selector = "div[@class=\'pdicon_name\']/a"
         | 
| 61 | 
            +
                  @price_selector = "div[@class=\'pdicon_price\']/div[@style=\'font-weight:bold;\']"
         | 
| 62 | 
            +
                  @site_url = 'https://www.queenshop.com.tw/m/PDList2.asp?'
         | 
| 63 | 
            +
                  @price_filter = nil
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                def scrape (params=[])
         | 
| 67 | 
            +
                  params.concat(ARGV)
         | 
| 68 | 
            +
                  conf = QConfig.new(params)
         | 
| 69 | 
            +
                  @price_filter = conf.parameters[:price]
         | 
| 70 | 
            +
                  
         | 
| 71 | 
            +
                  conf.pages.map do |page|
         | 
| 72 | 
            +
                    paginated_uri = "&page=#{page}"
         | 
| 73 | 
            +
                    fetch_result (paginated_uri)
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
                  @result
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
              end
         | 
| 79 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,56 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: queenshop
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.0.1
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - Even Chang
         | 
| 8 | 
            +
            - Luis Herrera
         | 
| 9 | 
            +
            - Katy Lee
         | 
| 10 | 
            +
            - Frank Lee
         | 
| 11 | 
            +
            autorequire: 
         | 
| 12 | 
            +
            bindir: bin
         | 
| 13 | 
            +
            cert_chain: []
         | 
| 14 | 
            +
            date: 2015-10-23 00:00:00.000000000 Z
         | 
| 15 | 
            +
            dependencies: []
         | 
| 16 | 
            +
            description: This is a gem scraping queenshop's website and returns the items with
         | 
| 17 | 
            +
              corresponding prices
         | 
| 18 | 
            +
            email:
         | 
| 19 | 
            +
            - kiki44552002@gmail.com
         | 
| 20 | 
            +
            - lmherrera86@gmail.com
         | 
| 21 | 
            +
            - katylee41024@yahoo.com.tw
         | 
| 22 | 
            +
            - frank1234211@gmail.com
         | 
| 23 | 
            +
            executables:
         | 
| 24 | 
            +
            - queenshop
         | 
| 25 | 
            +
            extensions: []
         | 
| 26 | 
            +
            extra_rdoc_files: []
         | 
| 27 | 
            +
            files:
         | 
| 28 | 
            +
            - bin/queenshop
         | 
| 29 | 
            +
            - lib/queenshop.rb
         | 
| 30 | 
            +
            - lib/queenshop/config.rb
         | 
| 31 | 
            +
            - lib/queenshop/scraper.rb
         | 
| 32 | 
            +
            homepage: http://rubygems.org/gems/pinkoi
         | 
| 33 | 
            +
            licenses:
         | 
| 34 | 
            +
            - MIT
         | 
| 35 | 
            +
            metadata: {}
         | 
| 36 | 
            +
            post_install_message: 
         | 
| 37 | 
            +
            rdoc_options: []
         | 
| 38 | 
            +
            require_paths:
         | 
| 39 | 
            +
            - lib
         | 
| 40 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 41 | 
            +
              requirements:
         | 
| 42 | 
            +
              - - ">="
         | 
| 43 | 
            +
                - !ruby/object:Gem::Version
         | 
| 44 | 
            +
                  version: '0'
         | 
| 45 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 46 | 
            +
              requirements:
         | 
| 47 | 
            +
              - - ">="
         | 
| 48 | 
            +
                - !ruby/object:Gem::Version
         | 
| 49 | 
            +
                  version: '0'
         | 
| 50 | 
            +
            requirements: []
         | 
| 51 | 
            +
            rubyforge_project: 
         | 
| 52 | 
            +
            rubygems_version: 2.5.0
         | 
| 53 | 
            +
            signing_key: 
         | 
| 54 | 
            +
            specification_version: 4
         | 
| 55 | 
            +
            summary: Scraper for Queenshop
         | 
| 56 | 
            +
            test_files: []
         |