instagram-crawler 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/Dockerfile +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +19 -1
- data/README.md +32 -0
- data/bin/instagram-crawler +1 -2
- data/lib/instagram_crawler/config.rb +6 -1
- data/lib/instagram_crawler/file.rb +2 -1
- data/lib/instagram_crawler/parser/args.rb +2 -0
- data/lib/instagram_crawler/parser/html.rb +2 -1
- data/lib/instagram_crawler/parser/json.rb +3 -1
- data/lib/instagram_crawler/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5bbb0b173538ea3c0c43142545fe645f5651e59733f6286b40e97df437b66619
|
4
|
+
data.tar.gz: 1626445eb2a4a8e64e64373c3fa4099d046c0d7fbf5c62057dfbe440d438ca59
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b48b5098fa06a70e85a0ccb6540aa428cc373f880b493be97f7ea3535547cd3ce2ce4f83605bc515010af1f9d1287729d03e6cfb3140461350d7fa9541b4e4e
|
7
|
+
data.tar.gz: 67ee453b7b5308796236ae2f758043e64277fc738b45e8a206a012230079ebc329ec87867f4e216547745bb56aadbca848120f27c2458d555c33bd0e1c327273
|
data/.travis.yml
CHANGED
data/Dockerfile
ADDED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
instagram-crawler (0.
|
4
|
+
instagram-crawler (0.2.0)
|
5
5
|
colorize (~> 0.8)
|
6
6
|
http (~> 4.0)
|
7
7
|
nokogiri (~> 1.8)
|
@@ -12,7 +12,14 @@ GEM
|
|
12
12
|
addressable (2.5.2)
|
13
13
|
public_suffix (>= 2.0.2, < 4.0)
|
14
14
|
colorize (0.8.1)
|
15
|
+
coveralls (0.8.22)
|
16
|
+
json (>= 1.8, < 3)
|
17
|
+
simplecov (~> 0.16.1)
|
18
|
+
term-ansicolor (~> 1.3)
|
19
|
+
thor (~> 0.19.4)
|
20
|
+
tins (~> 1.6)
|
15
21
|
diff-lcs (1.3)
|
22
|
+
docile (1.3.1)
|
16
23
|
domain_name (0.5.20180417)
|
17
24
|
unf (>= 0.0.5, < 1.0.0)
|
18
25
|
http (4.0.0)
|
@@ -24,6 +31,7 @@ GEM
|
|
24
31
|
domain_name (~> 0.5)
|
25
32
|
http-form_data (2.1.1)
|
26
33
|
http_parser.rb (0.6.0)
|
34
|
+
json (2.1.0)
|
27
35
|
mini_portile2 (2.3.0)
|
28
36
|
nokogiri (1.8.5)
|
29
37
|
mini_portile2 (~> 2.3.0)
|
@@ -42,6 +50,15 @@ GEM
|
|
42
50
|
diff-lcs (>= 1.2.0, < 2.0)
|
43
51
|
rspec-support (~> 3.8.0)
|
44
52
|
rspec-support (3.8.0)
|
53
|
+
simplecov (0.16.1)
|
54
|
+
docile (~> 1.1)
|
55
|
+
json (>= 1.8, < 3)
|
56
|
+
simplecov-html (~> 0.10.0)
|
57
|
+
simplecov-html (0.10.2)
|
58
|
+
term-ansicolor (1.7.0)
|
59
|
+
tins (~> 1.0)
|
60
|
+
thor (0.19.4)
|
61
|
+
tins (1.20.2)
|
45
62
|
unf (0.1.4)
|
46
63
|
unf_ext
|
47
64
|
unf_ext (0.0.7.5)
|
@@ -51,6 +68,7 @@ PLATFORMS
|
|
51
68
|
|
52
69
|
DEPENDENCIES
|
53
70
|
bundler (~> 1.17)
|
71
|
+
coveralls
|
54
72
|
instagram-crawler!
|
55
73
|
rake (~> 10.0)
|
56
74
|
rspec (~> 3.0)
|
data/README.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# Instagram Crawler
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/instagram-crawler.svg)](https://badge.fury.io/rb/instagram-crawler)
|
4
|
+
[![Maintainability](https://api.codeclimate.com/v1/badges/a1625a5a812f515bdd91/maintainability)](https://codeclimate.com/github/mgleon08/instagram-crawler/maintainability)
|
5
|
+
[![Build Status](https://travis-ci.org/mgleon08/instagram-crawler.svg?branch=master)](https://travis-ci.org/mgleon08/instagram-crawler)
|
6
|
+
[![Coverage Status](https://coveralls.io/repos/github/mgleon08/instagram-crawler/badge.svg?branch=master)](https://coveralls.io/github/mgleon08/instagram-crawler?branch=master)
|
7
|
+
[![security](https://hakiri.io/github/mgleon08/instagram-crawler/master.svg)](https://hakiri.io/github/mgleon08/instagram-crawler/master)
|
8
|
+
[![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/mgleon08/instagram-crawler/blob/master/LICENSE.txt)
|
9
|
+
|
10
|
+
|
3
11
|
> The easiest way to download instagram photos, posts and videos.
|
4
12
|
|
5
13
|
<img src="screenshots/logo.png" width="200" align="center">
|
@@ -48,6 +56,14 @@ instagram-crawler -u <user_name> -d -a 20181120
|
|
48
56
|
instagram-crawler -u <user_name> -l
|
49
57
|
```
|
50
58
|
|
59
|
+
### Proxy
|
60
|
+
|
61
|
+
`-P || --proxyname ` `-p || --port`
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
instagram-crawler -u <user_name> -P http://example.com -p 1234
|
65
|
+
```
|
66
|
+
|
51
67
|
### Help
|
52
68
|
|
53
69
|
`instagram-crawler -h | --help`
|
@@ -62,10 +78,26 @@ options:
|
|
62
78
|
-d, --download Download files
|
63
79
|
-a, --after DATE Download files after this date (YYYYMMDD)
|
64
80
|
-l, --log Generate a log file in the current directory
|
81
|
+
-P, --proxyname PROXYNAME Specify proxyname of your proxy server
|
82
|
+
-p, --port PORT Specify port of your proxy server (default port: 8080)
|
65
83
|
-v, --version Show the instagram-crawler version
|
66
84
|
-h, --help Show this message
|
67
85
|
```
|
68
86
|
|
87
|
+
## Docker
|
88
|
+
|
89
|
+
```docker
|
90
|
+
# make sure already setting env variable
|
91
|
+
# you can setting sessionid in local use $sessionid or pass sessionid to docker
|
92
|
+
# $PWD/instagram-crawler is file store path
|
93
|
+
|
94
|
+
# pull image
|
95
|
+
docker pull mgleon08/instagram-crawler
|
96
|
+
|
97
|
+
# docker run
|
98
|
+
docker run -it --rm -v $PWD/instagram-crawler:/instagram-crawler -e sessionid=$sessionid --name marvel mgleon08/instagram-crawler -u marvel -a 20181124 -d -l
|
99
|
+
```
|
100
|
+
|
69
101
|
## Contributing
|
70
102
|
|
71
103
|
Bug reports and pull requests are welcome on GitHub at [`https://github.com/mgleon08/instagram-crawler/pulls`](https://github.com/mgleon08/instagram-crawler/pulls)
|
data/bin/instagram-crawler
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require_relative '../lib/instagram_crawler'
|
3
|
-
|
4
3
|
begin
|
5
|
-
raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
|
6
4
|
args = InstagramCrawler::Parser::Args.new(ARGV)
|
5
|
+
raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
|
7
6
|
InstagramCrawler::Logger.setting(args.log)
|
8
7
|
InstagramCrawler::Main.run
|
9
8
|
rescue => e
|
@@ -4,7 +4,8 @@ module InstagramCrawler
|
|
4
4
|
class << self
|
5
5
|
attr_reader :default_url, :user_name, :base_url, :base_path,
|
6
6
|
:log_path, :after_date, :parse_date
|
7
|
-
attr_accessor :download
|
7
|
+
attr_accessor :download, :proxyname
|
8
|
+
attr_writer :port
|
8
9
|
|
9
10
|
def user_name=(user_name)
|
10
11
|
@user_name = user_name
|
@@ -17,6 +18,10 @@ module InstagramCrawler
|
|
17
18
|
@after_date = after_date
|
18
19
|
@parse_date = Time.parse(after_date).to_i
|
19
20
|
end
|
21
|
+
|
22
|
+
def port
|
23
|
+
@port ? @port.to_i : 8080
|
24
|
+
end
|
20
25
|
end
|
21
26
|
end
|
22
27
|
end
|
@@ -25,7 +25,8 @@ module InstagramCrawler
|
|
25
25
|
private
|
26
26
|
|
27
27
|
def get_binary_data(url)
|
28
|
-
res =
|
28
|
+
res = Config.proxyname ?
|
29
|
+
HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
|
29
30
|
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
|
30
31
|
res.to_s
|
31
32
|
end
|
@@ -20,6 +20,8 @@ module InstagramCrawler
|
|
20
20
|
opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
|
21
21
|
opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
|
22
22
|
opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
|
23
|
+
opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
|
24
|
+
opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
|
23
25
|
opts.on('-v', '--version', 'Show the instagram-crawler version') { puts("instagram-crawler #{InstagramCrawler::VERSION}"); exit }
|
24
26
|
opts.on('-h', '--help', 'Show this message') { puts(opts); exit }
|
25
27
|
opts.parse!(@args)
|
@@ -70,7 +70,8 @@ module InstagramCrawler
|
|
70
70
|
end
|
71
71
|
|
72
72
|
def get_html(url)
|
73
|
-
res =
|
73
|
+
res = Config.proxyname ?
|
74
|
+
HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
|
74
75
|
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
|
75
76
|
res.to_s
|
76
77
|
end
|
@@ -47,7 +47,9 @@ module InstagramCrawler
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def get_json(url)
|
50
|
-
|
50
|
+
http = HTTP.cookies(sessionid: ENV["sessionid"])
|
51
|
+
res = Config.proxyname ?
|
52
|
+
http.via(Config.proxyname, Config.port).get(url) : http.get(url)
|
51
53
|
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
|
52
54
|
res.to_s
|
53
55
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: instagram-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leon Ji
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- ".gitignore"
|
106
106
|
- ".rspec"
|
107
107
|
- ".travis.yml"
|
108
|
+
- Dockerfile
|
108
109
|
- Gemfile
|
109
110
|
- Gemfile.lock
|
110
111
|
- LICENSE.txt
|