instagram-crawler 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -0
- data/lib/instagram_crawler/config.rb +7 -2
- data/lib/instagram_crawler/parser/args.rb +1 -0
- data/lib/instagram_crawler/parser/base.rb +2 -2
- data/lib/instagram_crawler/parser/html.rb +2 -1
- data/lib/instagram_crawler/parser/json.rb +2 -1
- data/lib/instagram_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d477137a184a3ac845344d26e54118733ea6d2f0e3bf3aacbd743b5f11db5e04
|
4
|
+
data.tar.gz: e1f145d7032addca16e3eba2654e35a7348a45e8e99c1a4c4d380d784cd83940
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 65893d3b488960667d1ea31157a74e5785eae92550fcac8ae6cd28f6b6203bba12e87d04a9d0456ff4e2d86f8a3494f3dac05456d1a92c2111f44816828463df
|
7
|
+
data.tar.gz: 576b2455b75c6317ffbd9a6e5973e1c4ebbf362825946860d39fcc14f12176ab46724709814e61b625969f05aa50d6e4b0cc8c74962e6f76aaccc6f2c8f81c71
|
data/README.md
CHANGED
@@ -48,6 +48,14 @@ instagram-crawler -u <user_name>
|
|
48
48
|
instagram-crawler -u <user_name> -d -a 20181120
|
49
49
|
```
|
50
50
|
|
51
|
+
### Download files before this date (YYYYMMDD)
|
52
|
+
|
53
|
+
`-b || --before `
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
instagram-crawler -u <user_name> -d -b 20181120
|
57
|
+
```
|
58
|
+
|
51
59
|
### Generate log file
|
52
60
|
|
53
61
|
`-l || --log `
|
@@ -77,6 +85,7 @@ options:
|
|
77
85
|
-u, --username USERNAME Instagram username
|
78
86
|
-d, --download Download files
|
79
87
|
-a, --after DATE Download files after this date (YYYYMMDD)
|
88
|
+
-b, --before DATE Download files before this date (YYYYMMDD)
|
80
89
|
-l, --log Generate a log file in the current directory
|
81
90
|
-P, --proxyname PROXYNAME Specify proxyname of your proxy server
|
82
91
|
-p, --port PORT Specify port of your proxy server (default port: 8080)
|
@@ -98,6 +107,15 @@ docker pull mgleon08/instagram-crawler
|
|
98
107
|
docker run -it --rm -v $PWD/instagram-crawler:/instagram-crawler -e sessionid=$sessionid --name marvel mgleon08/instagram-crawler -u marvel -a 20181124 -d -l
|
99
108
|
```
|
100
109
|
|
110
|
+
|
111
|
+
## Terms of Use
|
112
|
+
|
113
|
+
[Instagram Terms of Use](https://www.instagram.com/about/legal/terms/before-january-19-2013/)
|
114
|
+
|
115
|
+
> 9.You must not access Instagram's private API by any other means other than the Instagram application itself.
|
116
|
+
10.You must not crawl, scrape, or otherwise cache any content from Instagram including but not limited to user profiles and photos.
|
117
|
+
|
118
|
+
|
101
119
|
## Contributing
|
102
120
|
|
103
121
|
Bug reports and pull requests are welcome on GitHub at [`https://github.com/mgleon08/instagram-crawler/pulls`](https://github.com/mgleon08/instagram-crawler/pulls)
|
@@ -3,7 +3,7 @@ module InstagramCrawler
|
|
3
3
|
@default_url = "https://www.instagram.com".freeze
|
4
4
|
class << self
|
5
5
|
attr_reader :default_url, :user_name, :base_url, :base_path,
|
6
|
-
:log_path, :after_date, :
|
6
|
+
:log_path, :after_date, :before_date, :parse_after_date, :parse_before_date
|
7
7
|
attr_accessor :download, :proxyname
|
8
8
|
attr_writer :port
|
9
9
|
|
@@ -16,7 +16,12 @@ module InstagramCrawler
|
|
16
16
|
|
17
17
|
def after_date=(after_date)
|
18
18
|
@after_date = after_date
|
19
|
-
@
|
19
|
+
@parse_after_date = Time.parse(after_date).to_i
|
20
|
+
end
|
21
|
+
|
22
|
+
def before_date=(before_date)
|
23
|
+
@before_date = before_date
|
24
|
+
@parse_before_date = Time.parse(before_date).to_i
|
20
25
|
end
|
21
26
|
|
22
27
|
def port
|
@@ -19,6 +19,7 @@ module InstagramCrawler
|
|
19
19
|
opts.on('-u', '--username USERNAME', 'Instagram username') { |user_name| Config.user_name = user_name }
|
20
20
|
opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
|
21
21
|
opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
|
22
|
+
opts.on('-b', '--before DATE', 'Download files before this date (YYYYMMDD)') { |before_date| Config.before_date = before_date }
|
22
23
|
opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
|
23
24
|
opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
|
24
25
|
opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
|
@@ -22,8 +22,8 @@ module InstagramCrawler
|
|
22
22
|
Time.at(ts).strftime('%Y-%m-%dT%H:%M')
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
26
|
-
if Config.after_date && (Config.
|
25
|
+
def check_after_time(time)
|
26
|
+
if Config.after_date && (Config.parse_after_date > time)
|
27
27
|
Logger.info "\nSuccess, the files after #{Config.after_date} have been downloaded!".light_green
|
28
28
|
exit
|
29
29
|
end
|
@@ -45,7 +45,8 @@ module InstagramCrawler
|
|
45
45
|
def loop_edges(edges)
|
46
46
|
edges.each do |edge|
|
47
47
|
node = edge["node"]
|
48
|
-
|
48
|
+
next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
|
49
|
+
check_after_time(node["taken_at_timestamp"])
|
49
50
|
time = parse_to_date(node["taken_at_timestamp"])
|
50
51
|
page_url = "https://www.instagram.com/p/#{node["shortcode"]}/"
|
51
52
|
|
@@ -26,7 +26,8 @@ module InstagramCrawler
|
|
26
26
|
def loop_edges(edges)
|
27
27
|
edges.each do |edge|
|
28
28
|
node = edge["node"]
|
29
|
-
|
29
|
+
next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
|
30
|
+
check_after_time(node["taken_at_timestamp"])
|
30
31
|
time = parse_to_date(node["taken_at_timestamp"])
|
31
32
|
|
32
33
|
if node["is_video"]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: instagram-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leon Ji
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|