huginn_instagram_agent 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/huginn_instagram_agent/instagram_agent.rb +152 -0
- data/lib/huginn_instagram_agent.rb +4 -0
- data/spec/instagram_agent_spec.rb +13 -0
- metadata +105 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 645f7e41f1ac78754c549774a4fda0aaee43e8ab8de1e549fba9dc16dde64ebf
|
4
|
+
data.tar.gz: abcb3c6662524c4d74b3d7a3e8a5e627a64c794f89f44547d9223934b385e770
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4d52637e2616dccb2f056c7a980103b1a5311d769273cc5abf3223d826fb5fa88cedd02608d470568577f69abbd6c7f002d476018f7ad2eb2cbe254a3bea7c8f
|
7
|
+
data.tar.gz: e6287bf0fca1623de4147a10114fefbdadbf3ca6a17d6663bfaec5c8f4d3a8c2607752741effd116115e687ad760d6204f8d71c5f855c1390b0a9845fd1d8b90
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module Agents
|
2
|
+
class InstagramAgent < Agent
|
3
|
+
can_dry_run!
|
4
|
+
|
5
|
+
default_schedule 'every_1h'
|
6
|
+
|
7
|
+
description <<-MD
|
8
|
+
Monitor public Instagram accounts and creates an event for each post.
|
9
|
+
|
10
|
+
It can be scheduled to hit Instagram as much as you want but will obey
|
11
|
+
the `wait_between_refresh` for each account to avoid being banned.
|
12
|
+
If set to `0` it will refresh all accounts at every run.
|
13
|
+
|
14
|
+
Links generally expire after 24 hours but this agent will try to keep the
|
15
|
+
corresponding events updated so they can be used in a feed.
|
16
|
+
MD
|
17
|
+
|
18
|
+
|
19
|
+
def default_options
|
20
|
+
{
|
21
|
+
:wait_between_refresh => 86400,
|
22
|
+
:accounts_to_monitor => []
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def validate_options
|
28
|
+
options['wait_between_refresh'] ||= 86400
|
29
|
+
errors.add(:base, "`wait_between_refresh` must be an integer >=0") unless (options['wait_between_refresh'].to_i >= 0)
|
30
|
+
|
31
|
+
errors.add(:base, "`accounts_to_monitor` must be an array of strings") unless options['accounts_to_monitor'].is_a?(Array)
|
32
|
+
options['accounts_to_monitor'].each{|v| v.sub!(/^@+/,'')}
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def working?
|
37
|
+
memory['error'] != true
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
def check
|
42
|
+
memory['error'] = nil
|
43
|
+
|
44
|
+
accounts_to_refresh.each do |account|
|
45
|
+
remember_fetching(account)
|
46
|
+
|
47
|
+
posts = get_posts(account) or next
|
48
|
+
|
49
|
+
posts.each do |post|
|
50
|
+
if seen_before?(post)
|
51
|
+
update_existing_event(post)
|
52
|
+
else
|
53
|
+
create_event :payload => post
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def get_posts(account)
|
62
|
+
url = "https://www.instagram.com/#{account}/?__a=1&__d=di"
|
63
|
+
|
64
|
+
response = HTTParty.get(url,
|
65
|
+
:headers => {
|
66
|
+
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
|
67
|
+
}
|
68
|
+
)
|
69
|
+
|
70
|
+
unless response.success?
|
71
|
+
error("[#{account}] Could not fetch #{url} - error #{response.code} | headers #{response.headers}")
|
72
|
+
memory['error'] = true
|
73
|
+
return nil
|
74
|
+
end
|
75
|
+
|
76
|
+
json = response.parsed_response
|
77
|
+
|
78
|
+
unless json
|
79
|
+
error("[#{account}] Could not extract JSON from #{url} - raw #{response.parsed_response} | headers #{response.headers}")
|
80
|
+
memory['error'] = true
|
81
|
+
return nil
|
82
|
+
end
|
83
|
+
|
84
|
+
posts = extract_posts(json)
|
85
|
+
|
86
|
+
unless posts.any?
|
87
|
+
error("[#{account}] Could not find any posts, strange - raw #{response.parsed_response} | headers #{response.headers} | json #{json}")
|
88
|
+
memory['error'] = true
|
89
|
+
return nil
|
90
|
+
end
|
91
|
+
|
92
|
+
return Array(posts).compact
|
93
|
+
end
|
94
|
+
|
95
|
+
def extract_posts(json)
|
96
|
+
json['graphql']['user']['edge_owner_to_timeline_media']['edges'].map do |edge|
|
97
|
+
edge['node']
|
98
|
+
end
|
99
|
+
|
100
|
+
rescue => e
|
101
|
+
return []
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
|
108
|
+
def instagramid_to_eventid
|
109
|
+
@instagramid_to_eventid ||= events.all.map{|e| [e.payload['id'], e.id]}.to_h
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
def seen_before?(post)
|
114
|
+
instagramid_to_eventid.has_key?(post['id'])
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
def update_existing_event(post)
|
119
|
+
event_id = instagramid_to_eventid.fetch(post['id'])
|
120
|
+
event = events.find(event_id)
|
121
|
+
event.payload = post
|
122
|
+
create_event(event)
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
def all_accounts
|
127
|
+
interpolated['accounts_to_monitor']
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def stale_accounts(refresh_every)
|
132
|
+
all_accounts.select do |account|
|
133
|
+
last_fetched_at = memory.dig('last_fetched_at', account)
|
134
|
+
last_fetched_at.nil? || last_fetched_at < refresh_every.seconds.ago.to_i
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def accounts_to_refresh
|
140
|
+
refresh_every = interpolated['wait_between_refresh'].to_i
|
141
|
+
|
142
|
+
return Array(refresh_every ? stale_accounts(refresh_every).sample : all_accounts)
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
def remember_fetching(account)
|
147
|
+
memory['last_fetched_at'] ||= {}
|
148
|
+
memory['last_fetched_at'][account] = Time.now.to_i
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rails_helper'
|
2
|
+
require 'huginn_agent/spec_helper'
|
3
|
+
|
4
|
+
describe Agents::InstagramAgent do
|
5
|
+
before(:each) do
|
6
|
+
@valid_options = Agents::InstagramAgent.new.default_options
|
7
|
+
@checker = Agents::InstagramAgent.new(:name => "InstagramAgent", :options => @valid_options)
|
8
|
+
@checker.user = users(:bob)
|
9
|
+
@checker.save!
|
10
|
+
end
|
11
|
+
|
12
|
+
pending "add specs here"
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: huginn_instagram_agent
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alessio Signorini
|
8
|
+
- Víctor A. Rodríguez
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.7'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.7'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '12.3'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '12.3'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: huginn_agent
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0.6'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0.6'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: httparty
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0.7'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.7'
|
70
|
+
description:
|
71
|
+
email:
|
72
|
+
- alessio@signorini.us
|
73
|
+
- victor@bit-man.guru
|
74
|
+
executables: []
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- lib/huginn_instagram_agent.rb
|
79
|
+
- lib/huginn_instagram_agent/instagram_agent.rb
|
80
|
+
- spec/instagram_agent_spec.rb
|
81
|
+
homepage: https://github.com/alessio-signorini/huginn-instagram-agent
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
metadata: {}
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubygems_version: 3.1.4
|
101
|
+
signing_key:
|
102
|
+
specification_version: 4
|
103
|
+
summary: Huginn Agent that monitors public Instagram accounts
|
104
|
+
test_files:
|
105
|
+
- spec/instagram_agent_spec.rb
|