huginn_instagram_agent 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/huginn_instagram_agent/instagram_agent.rb +152 -0
- data/lib/huginn_instagram_agent.rb +4 -0
- data/spec/instagram_agent_spec.rb +13 -0
- metadata +105 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 645f7e41f1ac78754c549774a4fda0aaee43e8ab8de1e549fba9dc16dde64ebf
|
4
|
+
data.tar.gz: abcb3c6662524c4d74b3d7a3e8a5e627a64c794f89f44547d9223934b385e770
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4d52637e2616dccb2f056c7a980103b1a5311d769273cc5abf3223d826fb5fa88cedd02608d470568577f69abbd6c7f002d476018f7ad2eb2cbe254a3bea7c8f
|
7
|
+
data.tar.gz: e6287bf0fca1623de4147a10114fefbdadbf3ca6a17d6663bfaec5c8f4d3a8c2607752741effd116115e687ad760d6204f8d71c5f855c1390b0a9845fd1d8b90
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module Agents
|
2
|
+
class InstagramAgent < Agent
|
3
|
+
can_dry_run!
|
4
|
+
|
5
|
+
default_schedule 'every_1h'
|
6
|
+
|
7
|
+
description <<-MD
|
8
|
+
Monitor public Instagram accounts and creates an event for each post.
|
9
|
+
|
10
|
+
It can be scheduled to hit Instagram as much as you want but will obey
|
11
|
+
the `wait_between_refresh` for each account to avoid being banned.
|
12
|
+
If set to `0` it will refresh all accounts at every run.
|
13
|
+
|
14
|
+
Links generally expire after 24 hours but this agent will try to keep the
|
15
|
+
corresponding events updated so they can be used in a feed.
|
16
|
+
MD
|
17
|
+
|
18
|
+
|
19
|
+
def default_options
|
20
|
+
{
|
21
|
+
:wait_between_refresh => 86400,
|
22
|
+
:accounts_to_monitor => []
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def validate_options
|
28
|
+
options['wait_between_refresh'] ||= 86400
|
29
|
+
errors.add(:base, "`wait_between_refresh` must be an integer >=0") unless (options['wait_between_refresh'].to_i >= 0)
|
30
|
+
|
31
|
+
errors.add(:base, "`accounts_to_monitor` must be an array of strings") unless options['accounts_to_monitor'].is_a?(Array)
|
32
|
+
options['accounts_to_monitor'].each{|v| v.sub!(/^@+/,'')}
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def working?
|
37
|
+
memory['error'] != true
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
def check
|
42
|
+
memory['error'] = nil
|
43
|
+
|
44
|
+
accounts_to_refresh.each do |account|
|
45
|
+
remember_fetching(account)
|
46
|
+
|
47
|
+
posts = get_posts(account) or next
|
48
|
+
|
49
|
+
posts.each do |post|
|
50
|
+
if seen_before?(post)
|
51
|
+
update_existing_event(post)
|
52
|
+
else
|
53
|
+
create_event :payload => post
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def get_posts(account)
|
62
|
+
url = "https://www.instagram.com/#{account}/?__a=1&__d=di"
|
63
|
+
|
64
|
+
response = HTTParty.get(url,
|
65
|
+
:headers => {
|
66
|
+
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
|
67
|
+
}
|
68
|
+
)
|
69
|
+
|
70
|
+
unless response.success?
|
71
|
+
error("[#{account}] Could not fetch #{url} - error #{response.code} | headers #{response.headers}")
|
72
|
+
memory['error'] = true
|
73
|
+
return nil
|
74
|
+
end
|
75
|
+
|
76
|
+
json = response.parsed_response
|
77
|
+
|
78
|
+
unless json
|
79
|
+
error("[#{account}] Could not extract JSON from #{url} - raw #{response.parsed_response} | headers #{response.headers}")
|
80
|
+
memory['error'] = true
|
81
|
+
return nil
|
82
|
+
end
|
83
|
+
|
84
|
+
posts = extract_posts(json)
|
85
|
+
|
86
|
+
unless posts.any?
|
87
|
+
error("[#{account}] Could not find any posts, strange - raw #{response.parsed_response} | headers #{response.headers} | json #{json}")
|
88
|
+
memory['error'] = true
|
89
|
+
return nil
|
90
|
+
end
|
91
|
+
|
92
|
+
return Array(posts).compact
|
93
|
+
end
|
94
|
+
|
95
|
+
def extract_posts(json)
|
96
|
+
json['graphql']['user']['edge_owner_to_timeline_media']['edges'].map do |edge|
|
97
|
+
edge['node']
|
98
|
+
end
|
99
|
+
|
100
|
+
rescue => e
|
101
|
+
return []
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
|
108
|
+
def instagramid_to_eventid
|
109
|
+
@instagramid_to_eventid ||= events.all.map{|e| [e.payload['id'], e.id]}.to_h
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
def seen_before?(post)
|
114
|
+
instagramid_to_eventid.has_key?(post['id'])
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
def update_existing_event(post)
|
119
|
+
event_id = instagramid_to_eventid.fetch(post['id'])
|
120
|
+
event = events.find(event_id)
|
121
|
+
event.payload = post
|
122
|
+
create_event(event)
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
def all_accounts
|
127
|
+
interpolated['accounts_to_monitor']
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def stale_accounts(refresh_every)
|
132
|
+
all_accounts.select do |account|
|
133
|
+
last_fetched_at = memory.dig('last_fetched_at', account)
|
134
|
+
last_fetched_at.nil? || last_fetched_at < refresh_every.seconds.ago.to_i
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def accounts_to_refresh
|
140
|
+
refresh_every = interpolated['wait_between_refresh'].to_i
|
141
|
+
|
142
|
+
return Array(refresh_every ? stale_accounts(refresh_every).sample : all_accounts)
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
def remember_fetching(account)
|
147
|
+
memory['last_fetched_at'] ||= {}
|
148
|
+
memory['last_fetched_at'][account] = Time.now.to_i
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rails_helper'
|
2
|
+
require 'huginn_agent/spec_helper'
|
3
|
+
|
4
|
+
describe Agents::InstagramAgent do
|
5
|
+
before(:each) do
|
6
|
+
@valid_options = Agents::InstagramAgent.new.default_options
|
7
|
+
@checker = Agents::InstagramAgent.new(:name => "InstagramAgent", :options => @valid_options)
|
8
|
+
@checker.user = users(:bob)
|
9
|
+
@checker.save!
|
10
|
+
end
|
11
|
+
|
12
|
+
pending "add specs here"
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: huginn_instagram_agent
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alessio Signorini
|
8
|
+
- Víctor A. Rodríguez
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.7'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.7'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '12.3'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '12.3'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: huginn_agent
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0.6'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0.6'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: httparty
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0.7'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.7'
|
70
|
+
description:
|
71
|
+
email:
|
72
|
+
- alessio@signorini.us
|
73
|
+
- victor@bit-man.guru
|
74
|
+
executables: []
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- lib/huginn_instagram_agent.rb
|
79
|
+
- lib/huginn_instagram_agent/instagram_agent.rb
|
80
|
+
- spec/instagram_agent_spec.rb
|
81
|
+
homepage: https://github.com/alessio-signorini/huginn-instagram-agent
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
metadata: {}
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubygems_version: 3.1.4
|
101
|
+
signing_key:
|
102
|
+
specification_version: 4
|
103
|
+
summary: Huginn Agent that monitors public Instagram accounts
|
104
|
+
test_files:
|
105
|
+
- spec/instagram_agent_spec.rb
|