instagram-crawler 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/Dockerfile +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +19 -1
- data/README.md +32 -0
- data/bin/instagram-crawler +1 -2
- data/lib/instagram_crawler/config.rb +6 -1
- data/lib/instagram_crawler/file.rb +2 -1
- data/lib/instagram_crawler/parser/args.rb +2 -0
- data/lib/instagram_crawler/parser/html.rb +2 -1
- data/lib/instagram_crawler/parser/json.rb +3 -1
- data/lib/instagram_crawler/version.rb +1 -1
- metadata +3 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 5bbb0b173538ea3c0c43142545fe645f5651e59733f6286b40e97df437b66619
         | 
| 4 | 
            +
              data.tar.gz: 1626445eb2a4a8e64e64373c3fa4099d046c0d7fbf5c62057dfbe440d438ca59
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 4b48b5098fa06a70e85a0ccb6540aa428cc373f880b493be97f7ea3535547cd3ce2ce4f83605bc515010af1f9d1287729d03e6cfb3140461350d7fa9541b4e4e
         | 
| 7 | 
            +
              data.tar.gz: 67ee453b7b5308796236ae2f758043e64277fc738b45e8a206a012230079ebc329ec87867f4e216547745bb56aadbca848120f27c2458d555c33bd0e1c327273
         | 
    
        data/.travis.yml
    CHANGED
    
    
    
        data/Dockerfile
    ADDED
    
    
    
        data/Gemfile
    CHANGED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                instagram-crawler (0. | 
| 4 | 
            +
                instagram-crawler (0.2.0)
         | 
| 5 5 | 
             
                  colorize (~> 0.8)
         | 
| 6 6 | 
             
                  http (~> 4.0)
         | 
| 7 7 | 
             
                  nokogiri (~> 1.8)
         | 
| @@ -12,7 +12,14 @@ GEM | |
| 12 12 | 
             
                addressable (2.5.2)
         | 
| 13 13 | 
             
                  public_suffix (>= 2.0.2, < 4.0)
         | 
| 14 14 | 
             
                colorize (0.8.1)
         | 
| 15 | 
            +
                coveralls (0.8.22)
         | 
| 16 | 
            +
                  json (>= 1.8, < 3)
         | 
| 17 | 
            +
                  simplecov (~> 0.16.1)
         | 
| 18 | 
            +
                  term-ansicolor (~> 1.3)
         | 
| 19 | 
            +
                  thor (~> 0.19.4)
         | 
| 20 | 
            +
                  tins (~> 1.6)
         | 
| 15 21 | 
             
                diff-lcs (1.3)
         | 
| 22 | 
            +
                docile (1.3.1)
         | 
| 16 23 | 
             
                domain_name (0.5.20180417)
         | 
| 17 24 | 
             
                  unf (>= 0.0.5, < 1.0.0)
         | 
| 18 25 | 
             
                http (4.0.0)
         | 
| @@ -24,6 +31,7 @@ GEM | |
| 24 31 | 
             
                  domain_name (~> 0.5)
         | 
| 25 32 | 
             
                http-form_data (2.1.1)
         | 
| 26 33 | 
             
                http_parser.rb (0.6.0)
         | 
| 34 | 
            +
                json (2.1.0)
         | 
| 27 35 | 
             
                mini_portile2 (2.3.0)
         | 
| 28 36 | 
             
                nokogiri (1.8.5)
         | 
| 29 37 | 
             
                  mini_portile2 (~> 2.3.0)
         | 
| @@ -42,6 +50,15 @@ GEM | |
| 42 50 | 
             
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 43 51 | 
             
                  rspec-support (~> 3.8.0)
         | 
| 44 52 | 
             
                rspec-support (3.8.0)
         | 
| 53 | 
            +
                simplecov (0.16.1)
         | 
| 54 | 
            +
                  docile (~> 1.1)
         | 
| 55 | 
            +
                  json (>= 1.8, < 3)
         | 
| 56 | 
            +
                  simplecov-html (~> 0.10.0)
         | 
| 57 | 
            +
                simplecov-html (0.10.2)
         | 
| 58 | 
            +
                term-ansicolor (1.7.0)
         | 
| 59 | 
            +
                  tins (~> 1.0)
         | 
| 60 | 
            +
                thor (0.19.4)
         | 
| 61 | 
            +
                tins (1.20.2)
         | 
| 45 62 | 
             
                unf (0.1.4)
         | 
| 46 63 | 
             
                  unf_ext
         | 
| 47 64 | 
             
                unf_ext (0.0.7.5)
         | 
| @@ -51,6 +68,7 @@ PLATFORMS | |
| 51 68 |  | 
| 52 69 | 
             
            DEPENDENCIES
         | 
| 53 70 | 
             
              bundler (~> 1.17)
         | 
| 71 | 
            +
              coveralls
         | 
| 54 72 | 
             
              instagram-crawler!
         | 
| 55 73 | 
             
              rake (~> 10.0)
         | 
| 56 74 | 
             
              rspec (~> 3.0)
         | 
    
        data/README.md
    CHANGED
    
    | @@ -1,5 +1,13 @@ | |
| 1 1 | 
             
            # Instagram Crawler
         | 
| 2 2 |  | 
| 3 | 
            +
            [](https://badge.fury.io/rb/instagram-crawler)
         | 
| 4 | 
            +
            [](https://codeclimate.com/github/mgleon08/instagram-crawler/maintainability)
         | 
| 5 | 
            +
            [](https://travis-ci.org/mgleon08/instagram-crawler)
         | 
| 6 | 
            +
            [](https://coveralls.io/github/mgleon08/instagram-crawler?branch=master)
         | 
| 7 | 
            +
            [](https://hakiri.io/github/mgleon08/instagram-crawler/master)
         | 
| 8 | 
            +
            [](https://github.com/mgleon08/instagram-crawler/blob/master/LICENSE.txt)
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 3 11 | 
             
            > The easiest way to download instagram photos, posts and videos.
         | 
| 4 12 |  | 
| 5 13 | 
             
            <img src="screenshots/logo.png" width="200" align="center">
         | 
| @@ -48,6 +56,14 @@ instagram-crawler -u <user_name> -d -a 20181120 | |
| 48 56 | 
             
            instagram-crawler -u <user_name> -l
         | 
| 49 57 | 
             
            ```
         | 
| 50 58 |  | 
| 59 | 
            +
            ### Proxy
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            `-P || --proxyname ` `-p || --port`
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            ```ruby
         | 
| 64 | 
            +
            instagram-crawler -u <user_name> -P http://example.com -p 1234
         | 
| 65 | 
            +
            ```
         | 
| 66 | 
            +
             | 
| 51 67 | 
             
            ### Help
         | 
| 52 68 |  | 
| 53 69 | 
             
            `instagram-crawler -h | --help`
         | 
| @@ -62,10 +78,26 @@ options: | |
| 62 78 | 
             
                -d, --download                   Download files
         | 
| 63 79 | 
             
                -a, --after DATE                 Download files after this date (YYYYMMDD)
         | 
| 64 80 | 
             
                -l, --log                        Generate a log file in the current directory
         | 
| 81 | 
            +
                -P, --proxyname PROXYNAME        Specify proxyname of your proxy server
         | 
| 82 | 
            +
                -p, --port PORT                  Specify port of your proxy server (default port: 8080)
         | 
| 65 83 | 
             
                -v, --version                    Show the instagram-crawler version
         | 
| 66 84 | 
             
                -h, --help                       Show this message
         | 
| 67 85 | 
             
            ```
         | 
| 68 86 |  | 
| 87 | 
            +
            ## Docker
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            ```docker
         | 
| 90 | 
            +
            # make sure already setting env variable
         | 
| 91 | 
            +
            # you can setting sessionid in local use $sessionid or pass sessionid to docker
         | 
| 92 | 
            +
            # $PWD/instagram-crawler is file store path
         | 
| 93 | 
            +
             | 
| 94 | 
            +
            # pull image
         | 
| 95 | 
            +
            docker pull mgleon08/instagram-crawler
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            # docker run
         | 
| 98 | 
            +
            docker run -it --rm -v $PWD/instagram-crawler:/instagram-crawler -e sessionid=$sessionid --name marvel mgleon08/instagram-crawler -u marvel -a 20181124 -d -l
         | 
| 99 | 
            +
            ```
         | 
| 100 | 
            +
             | 
| 69 101 | 
             
            ## Contributing
         | 
| 70 102 |  | 
| 71 103 | 
             
            Bug reports and pull requests are welcome on GitHub at [`https://github.com/mgleon08/instagram-crawler/pulls`](https://github.com/mgleon08/instagram-crawler/pulls)
         | 
    
        data/bin/instagram-crawler
    CHANGED
    
    | @@ -1,9 +1,8 @@ | |
| 1 1 | 
             
            #!/usr/bin/env ruby
         | 
| 2 2 | 
             
            require_relative '../lib/instagram_crawler'
         | 
| 3 | 
            -
             | 
| 4 3 | 
             
            begin
         | 
| 5 | 
            -
              raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
         | 
| 6 4 | 
             
              args = InstagramCrawler::Parser::Args.new(ARGV)
         | 
| 5 | 
            +
              raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
         | 
| 7 6 | 
             
              InstagramCrawler::Logger.setting(args.log)
         | 
| 8 7 | 
             
              InstagramCrawler::Main.run
         | 
| 9 8 | 
             
            rescue => e
         | 
| @@ -4,7 +4,8 @@ module InstagramCrawler | |
| 4 4 | 
             
                class << self
         | 
| 5 5 | 
             
                  attr_reader :default_url, :user_name, :base_url, :base_path,
         | 
| 6 6 | 
             
                              :log_path, :after_date, :parse_date
         | 
| 7 | 
            -
                  attr_accessor :download
         | 
| 7 | 
            +
                  attr_accessor :download, :proxyname
         | 
| 8 | 
            +
                  attr_writer :port
         | 
| 8 9 |  | 
| 9 10 | 
             
                  def user_name=(user_name)
         | 
| 10 11 | 
             
                    @user_name = user_name
         | 
| @@ -17,6 +18,10 @@ module InstagramCrawler | |
| 17 18 | 
             
                    @after_date = after_date
         | 
| 18 19 | 
             
                    @parse_date = Time.parse(after_date).to_i
         | 
| 19 20 | 
             
                  end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                  def port
         | 
| 23 | 
            +
                    @port ? @port.to_i : 8080
         | 
| 24 | 
            +
                  end
         | 
| 20 25 | 
             
                end
         | 
| 21 26 | 
             
              end
         | 
| 22 27 | 
             
            end
         | 
| @@ -25,7 +25,8 @@ module InstagramCrawler | |
| 25 25 | 
             
                  private
         | 
| 26 26 |  | 
| 27 27 | 
             
                  def get_binary_data(url)
         | 
| 28 | 
            -
                    res =  | 
| 28 | 
            +
                    res = Config.proxyname ?
         | 
| 29 | 
            +
                      HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
         | 
| 29 30 | 
             
                    raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
         | 
| 30 31 | 
             
                    res.to_s
         | 
| 31 32 | 
             
                  end
         | 
| @@ -20,6 +20,8 @@ module InstagramCrawler | |
| 20 20 | 
             
                    opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
         | 
| 21 21 | 
             
                    opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
         | 
| 22 22 | 
             
                    opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
         | 
| 23 | 
            +
                    opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
         | 
| 24 | 
            +
                    opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
         | 
| 23 25 | 
             
                    opts.on('-v', '--version', 'Show the instagram-crawler version') { puts("instagram-crawler #{InstagramCrawler::VERSION}"); exit }
         | 
| 24 26 | 
             
                    opts.on('-h', '--help', 'Show this message') { puts(opts); exit }
         | 
| 25 27 | 
             
                    opts.parse!(@args)
         | 
| @@ -70,7 +70,8 @@ module InstagramCrawler | |
| 70 70 | 
             
                  end
         | 
| 71 71 |  | 
| 72 72 | 
             
                  def get_html(url)
         | 
| 73 | 
            -
                    res =  | 
| 73 | 
            +
                    res = Config.proxyname ?
         | 
| 74 | 
            +
                      HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
         | 
| 74 75 | 
             
                    raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
         | 
| 75 76 | 
             
                    res.to_s
         | 
| 76 77 | 
             
                  end
         | 
| @@ -47,7 +47,9 @@ module InstagramCrawler | |
| 47 47 | 
             
                  end
         | 
| 48 48 |  | 
| 49 49 | 
             
                  def get_json(url)
         | 
| 50 | 
            -
                     | 
| 50 | 
            +
                    http = HTTP.cookies(sessionid: ENV["sessionid"])
         | 
| 51 | 
            +
                    res = Config.proxyname ?
         | 
| 52 | 
            +
                      http.via(Config.proxyname, Config.port).get(url) : http.get(url)
         | 
| 51 53 | 
             
                    raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
         | 
| 52 54 | 
             
                    res.to_s
         | 
| 53 55 | 
             
                  end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: instagram-crawler
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Leon Ji
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2018- | 
| 11 | 
            +
            date: 2018-12-18 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         | 
| @@ -105,6 +105,7 @@ files: | |
| 105 105 | 
             
            - ".gitignore"
         | 
| 106 106 | 
             
            - ".rspec"
         | 
| 107 107 | 
             
            - ".travis.yml"
         | 
| 108 | 
            +
            - Dockerfile
         | 
| 108 109 | 
             
            - Gemfile
         | 
| 109 110 | 
             
            - Gemfile.lock
         | 
| 110 111 | 
             
            - LICENSE.txt
         |