instagram-crawler 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -0
- data/lib/instagram_crawler/config.rb +7 -2
- data/lib/instagram_crawler/parser/args.rb +1 -0
- data/lib/instagram_crawler/parser/base.rb +2 -2
- data/lib/instagram_crawler/parser/html.rb +2 -1
- data/lib/instagram_crawler/parser/json.rb +2 -1
- data/lib/instagram_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d477137a184a3ac845344d26e54118733ea6d2f0e3bf3aacbd743b5f11db5e04
|
4
|
+
data.tar.gz: e1f145d7032addca16e3eba2654e35a7348a45e8e99c1a4c4d380d784cd83940
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 65893d3b488960667d1ea31157a74e5785eae92550fcac8ae6cd28f6b6203bba12e87d04a9d0456ff4e2d86f8a3494f3dac05456d1a92c2111f44816828463df
|
7
|
+
data.tar.gz: 576b2455b75c6317ffbd9a6e5973e1c4ebbf362825946860d39fcc14f12176ab46724709814e61b625969f05aa50d6e4b0cc8c74962e6f76aaccc6f2c8f81c71
|
data/README.md
CHANGED
@@ -48,6 +48,14 @@ instagram-crawler -u <user_name>
|
|
48
48
|
instagram-crawler -u <user_name> -d -a 20181120
|
49
49
|
```
|
50
50
|
|
51
|
+
### Download files before this date (YYYYMMDD)
|
52
|
+
|
53
|
+
`-b || --before `
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
instagram-crawler -u <user_name> -d -b 20181120
|
57
|
+
```
|
58
|
+
|
51
59
|
### Generate log file
|
52
60
|
|
53
61
|
`-l || --log `
|
@@ -77,6 +85,7 @@ options:
|
|
77
85
|
-u, --username USERNAME Instagram username
|
78
86
|
-d, --download Download files
|
79
87
|
-a, --after DATE Download files after this date (YYYYMMDD)
|
88
|
+
-b, --before DATE Download files before this date (YYYYMMDD)
|
80
89
|
-l, --log Generate a log file in the current directory
|
81
90
|
-P, --proxyname PROXYNAME Specify proxyname of your proxy server
|
82
91
|
-p, --port PORT Specify port of your proxy server (default port: 8080)
|
@@ -98,6 +107,15 @@ docker pull mgleon08/instagram-crawler
|
|
98
107
|
docker run -it --rm -v $PWD/instagram-crawler:/instagram-crawler -e sessionid=$sessionid --name marvel mgleon08/instagram-crawler -u marvel -a 20181124 -d -l
|
99
108
|
```
|
100
109
|
|
110
|
+
|
111
|
+
## Terms of Use
|
112
|
+
|
113
|
+
[Instagram Terms of Use](https://www.instagram.com/about/legal/terms/before-january-19-2013/)
|
114
|
+
|
115
|
+
> 9.You must not access Instagram's private API by any other means other than the Instagram application itself.
|
116
|
+
10.You must not crawl, scrape, or otherwise cache any content from Instagram including but not limited to user profiles and photos.
|
117
|
+
|
118
|
+
|
101
119
|
## Contributing
|
102
120
|
|
103
121
|
Bug reports and pull requests are welcome on GitHub at [`https://github.com/mgleon08/instagram-crawler/pulls`](https://github.com/mgleon08/instagram-crawler/pulls)
|
@@ -3,7 +3,7 @@ module InstagramCrawler
|
|
3
3
|
@default_url = "https://www.instagram.com".freeze
|
4
4
|
class << self
|
5
5
|
attr_reader :default_url, :user_name, :base_url, :base_path,
|
6
|
-
:log_path, :after_date, :
|
6
|
+
:log_path, :after_date, :before_date, :parse_after_date, :parse_before_date
|
7
7
|
attr_accessor :download, :proxyname
|
8
8
|
attr_writer :port
|
9
9
|
|
@@ -16,7 +16,12 @@ module InstagramCrawler
|
|
16
16
|
|
17
17
|
def after_date=(after_date)
|
18
18
|
@after_date = after_date
|
19
|
-
@
|
19
|
+
@parse_after_date = Time.parse(after_date).to_i
|
20
|
+
end
|
21
|
+
|
22
|
+
def before_date=(before_date)
|
23
|
+
@before_date = before_date
|
24
|
+
@parse_before_date = Time.parse(before_date).to_i
|
20
25
|
end
|
21
26
|
|
22
27
|
def port
|
@@ -19,6 +19,7 @@ module InstagramCrawler
|
|
19
19
|
opts.on('-u', '--username USERNAME', 'Instagram username') { |user_name| Config.user_name = user_name }
|
20
20
|
opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
|
21
21
|
opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
|
22
|
+
opts.on('-b', '--before DATE', 'Download files before this date (YYYYMMDD)') { |before_date| Config.before_date = before_date }
|
22
23
|
opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
|
23
24
|
opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
|
24
25
|
opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
|
@@ -22,8 +22,8 @@ module InstagramCrawler
|
|
22
22
|
Time.at(ts).strftime('%Y-%m-%dT%H:%M')
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
26
|
-
if Config.after_date && (Config.
|
25
|
+
def check_after_time(time)
|
26
|
+
if Config.after_date && (Config.parse_after_date > time)
|
27
27
|
Logger.info "\nSuccess, the files after #{Config.after_date} have been downloaded!".light_green
|
28
28
|
exit
|
29
29
|
end
|
@@ -45,7 +45,8 @@ module InstagramCrawler
|
|
45
45
|
def loop_edges(edges)
|
46
46
|
edges.each do |edge|
|
47
47
|
node = edge["node"]
|
48
|
-
|
48
|
+
next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
|
49
|
+
check_after_time(node["taken_at_timestamp"])
|
49
50
|
time = parse_to_date(node["taken_at_timestamp"])
|
50
51
|
page_url = "https://www.instagram.com/p/#{node["shortcode"]}/"
|
51
52
|
|
@@ -26,7 +26,8 @@ module InstagramCrawler
|
|
26
26
|
def loop_edges(edges)
|
27
27
|
edges.each do |edge|
|
28
28
|
node = edge["node"]
|
29
|
-
|
29
|
+
next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
|
30
|
+
check_after_time(node["taken_at_timestamp"])
|
30
31
|
time = parse_to_date(node["taken_at_timestamp"])
|
31
32
|
|
32
33
|
if node["is_video"]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: instagram-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leon Ji
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|