instagram-crawler 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/Dockerfile +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +19 -1
- data/README.md +32 -0
- data/bin/instagram-crawler +1 -2
- data/lib/instagram_crawler/config.rb +6 -1
- data/lib/instagram_crawler/file.rb +2 -1
- data/lib/instagram_crawler/parser/args.rb +2 -0
- data/lib/instagram_crawler/parser/html.rb +2 -1
- data/lib/instagram_crawler/parser/json.rb +3 -1
- data/lib/instagram_crawler/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5bbb0b173538ea3c0c43142545fe645f5651e59733f6286b40e97df437b66619
|
4
|
+
data.tar.gz: 1626445eb2a4a8e64e64373c3fa4099d046c0d7fbf5c62057dfbe440d438ca59
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b48b5098fa06a70e85a0ccb6540aa428cc373f880b493be97f7ea3535547cd3ce2ce4f83605bc515010af1f9d1287729d03e6cfb3140461350d7fa9541b4e4e
|
7
|
+
data.tar.gz: 67ee453b7b5308796236ae2f758043e64277fc738b45e8a206a012230079ebc329ec87867f4e216547745bb56aadbca848120f27c2458d555c33bd0e1c327273
|
data/.travis.yml
CHANGED
data/Dockerfile
ADDED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
instagram-crawler (0.
|
4
|
+
instagram-crawler (0.2.0)
|
5
5
|
colorize (~> 0.8)
|
6
6
|
http (~> 4.0)
|
7
7
|
nokogiri (~> 1.8)
|
@@ -12,7 +12,14 @@ GEM
|
|
12
12
|
addressable (2.5.2)
|
13
13
|
public_suffix (>= 2.0.2, < 4.0)
|
14
14
|
colorize (0.8.1)
|
15
|
+
coveralls (0.8.22)
|
16
|
+
json (>= 1.8, < 3)
|
17
|
+
simplecov (~> 0.16.1)
|
18
|
+
term-ansicolor (~> 1.3)
|
19
|
+
thor (~> 0.19.4)
|
20
|
+
tins (~> 1.6)
|
15
21
|
diff-lcs (1.3)
|
22
|
+
docile (1.3.1)
|
16
23
|
domain_name (0.5.20180417)
|
17
24
|
unf (>= 0.0.5, < 1.0.0)
|
18
25
|
http (4.0.0)
|
@@ -24,6 +31,7 @@ GEM
|
|
24
31
|
domain_name (~> 0.5)
|
25
32
|
http-form_data (2.1.1)
|
26
33
|
http_parser.rb (0.6.0)
|
34
|
+
json (2.1.0)
|
27
35
|
mini_portile2 (2.3.0)
|
28
36
|
nokogiri (1.8.5)
|
29
37
|
mini_portile2 (~> 2.3.0)
|
@@ -42,6 +50,15 @@ GEM
|
|
42
50
|
diff-lcs (>= 1.2.0, < 2.0)
|
43
51
|
rspec-support (~> 3.8.0)
|
44
52
|
rspec-support (3.8.0)
|
53
|
+
simplecov (0.16.1)
|
54
|
+
docile (~> 1.1)
|
55
|
+
json (>= 1.8, < 3)
|
56
|
+
simplecov-html (~> 0.10.0)
|
57
|
+
simplecov-html (0.10.2)
|
58
|
+
term-ansicolor (1.7.0)
|
59
|
+
tins (~> 1.0)
|
60
|
+
thor (0.19.4)
|
61
|
+
tins (1.20.2)
|
45
62
|
unf (0.1.4)
|
46
63
|
unf_ext
|
47
64
|
unf_ext (0.0.7.5)
|
@@ -51,6 +68,7 @@ PLATFORMS
|
|
51
68
|
|
52
69
|
DEPENDENCIES
|
53
70
|
bundler (~> 1.17)
|
71
|
+
coveralls
|
54
72
|
instagram-crawler!
|
55
73
|
rake (~> 10.0)
|
56
74
|
rspec (~> 3.0)
|
data/README.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# Instagram Crawler
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/instagram-crawler)
|
4
|
+
[](https://codeclimate.com/github/mgleon08/instagram-crawler/maintainability)
|
5
|
+
[](https://travis-ci.org/mgleon08/instagram-crawler)
|
6
|
+
[](https://coveralls.io/github/mgleon08/instagram-crawler?branch=master)
|
7
|
+
[](https://hakiri.io/github/mgleon08/instagram-crawler/master)
|
8
|
+
[](https://github.com/mgleon08/instagram-crawler/blob/master/LICENSE.txt)
|
9
|
+
|
10
|
+
|
3
11
|
> The easiest way to download instagram photos, posts and videos.
|
4
12
|
|
5
13
|
<img src="screenshots/logo.png" width="200" align="center">
|
@@ -48,6 +56,14 @@ instagram-crawler -u <user_name> -d -a 20181120
|
|
48
56
|
instagram-crawler -u <user_name> -l
|
49
57
|
```
|
50
58
|
|
59
|
+
### Proxy
|
60
|
+
|
61
|
+
`-P || --proxyname ` `-p || --port`
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
instagram-crawler -u <user_name> -P http://example.com -p 1234
|
65
|
+
```
|
66
|
+
|
51
67
|
### Help
|
52
68
|
|
53
69
|
`instagram-crawler -h | --help`
|
@@ -62,10 +78,26 @@ options:
|
|
62
78
|
-d, --download Download files
|
63
79
|
-a, --after DATE Download files after this date (YYYYMMDD)
|
64
80
|
-l, --log Generate a log file in the current directory
|
81
|
+
-P, --proxyname PROXYNAME Specify proxyname of your proxy server
|
82
|
+
-p, --port PORT Specify port of your proxy server (default port: 8080)
|
65
83
|
-v, --version Show the instagram-crawler version
|
66
84
|
-h, --help Show this message
|
67
85
|
```
|
68
86
|
|
87
|
+
## Docker
|
88
|
+
|
89
|
+
```docker
|
90
|
+
# make sure already setting env variable
|
91
|
+
# you can setting sessionid in local use $sessionid or pass sessionid to docker
|
92
|
+
# $PWD/instagram-crawler is file store path
|
93
|
+
|
94
|
+
# pull image
|
95
|
+
docker pull mgleon08/instagram-crawler
|
96
|
+
|
97
|
+
# docker run
|
98
|
+
docker run -it --rm -v $PWD/instagram-crawler:/instagram-crawler -e sessionid=$sessionid --name marvel mgleon08/instagram-crawler -u marvel -a 20181124 -d -l
|
99
|
+
```
|
100
|
+
|
69
101
|
## Contributing
|
70
102
|
|
71
103
|
Bug reports and pull requests are welcome on GitHub at [`https://github.com/mgleon08/instagram-crawler/pulls`](https://github.com/mgleon08/instagram-crawler/pulls)
|
data/bin/instagram-crawler
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require_relative '../lib/instagram_crawler'
|
3
|
-
|
4
3
|
begin
|
5
|
-
raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
|
6
4
|
args = InstagramCrawler::Parser::Args.new(ARGV)
|
5
|
+
raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
|
7
6
|
InstagramCrawler::Logger.setting(args.log)
|
8
7
|
InstagramCrawler::Main.run
|
9
8
|
rescue => e
|
@@ -4,7 +4,8 @@ module InstagramCrawler
|
|
4
4
|
class << self
|
5
5
|
attr_reader :default_url, :user_name, :base_url, :base_path,
|
6
6
|
:log_path, :after_date, :parse_date
|
7
|
-
attr_accessor :download
|
7
|
+
attr_accessor :download, :proxyname
|
8
|
+
attr_writer :port
|
8
9
|
|
9
10
|
def user_name=(user_name)
|
10
11
|
@user_name = user_name
|
@@ -17,6 +18,10 @@ module InstagramCrawler
|
|
17
18
|
@after_date = after_date
|
18
19
|
@parse_date = Time.parse(after_date).to_i
|
19
20
|
end
|
21
|
+
|
22
|
+
def port
|
23
|
+
@port ? @port.to_i : 8080
|
24
|
+
end
|
20
25
|
end
|
21
26
|
end
|
22
27
|
end
|
@@ -25,7 +25,8 @@ module InstagramCrawler
|
|
25
25
|
private
|
26
26
|
|
27
27
|
def get_binary_data(url)
|
28
|
-
res =
|
28
|
+
res = Config.proxyname ?
|
29
|
+
HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
|
29
30
|
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
|
30
31
|
res.to_s
|
31
32
|
end
|
@@ -20,6 +20,8 @@ module InstagramCrawler
|
|
20
20
|
opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
|
21
21
|
opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
|
22
22
|
opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
|
23
|
+
opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
|
24
|
+
opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
|
23
25
|
opts.on('-v', '--version', 'Show the instagram-crawler version') { puts("instagram-crawler #{InstagramCrawler::VERSION}"); exit }
|
24
26
|
opts.on('-h', '--help', 'Show this message') { puts(opts); exit }
|
25
27
|
opts.parse!(@args)
|
@@ -70,7 +70,8 @@ module InstagramCrawler
|
|
70
70
|
end
|
71
71
|
|
72
72
|
def get_html(url)
|
73
|
-
res =
|
73
|
+
res = Config.proxyname ?
|
74
|
+
HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
|
74
75
|
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
|
75
76
|
res.to_s
|
76
77
|
end
|
@@ -47,7 +47,9 @@ module InstagramCrawler
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def get_json(url)
|
50
|
-
|
50
|
+
http = HTTP.cookies(sessionid: ENV["sessionid"])
|
51
|
+
res = Config.proxyname ?
|
52
|
+
http.via(Config.proxyname, Config.port).get(url) : http.get(url)
|
51
53
|
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
|
52
54
|
res.to_s
|
53
55
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: instagram-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leon Ji
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- ".gitignore"
|
106
106
|
- ".rspec"
|
107
107
|
- ".travis.yml"
|
108
|
+
- Dockerfile
|
108
109
|
- Gemfile
|
109
110
|
- Gemfile.lock
|
110
111
|
- LICENSE.txt
|