huginn_instagram_agent 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 645f7e41f1ac78754c549774a4fda0aaee43e8ab8de1e549fba9dc16dde64ebf
4
+ data.tar.gz: abcb3c6662524c4d74b3d7a3e8a5e627a64c794f89f44547d9223934b385e770
5
+ SHA512:
6
+ metadata.gz: 4d52637e2616dccb2f056c7a980103b1a5311d769273cc5abf3223d826fb5fa88cedd02608d470568577f69abbd6c7f002d476018f7ad2eb2cbe254a3bea7c8f
7
+ data.tar.gz: e6287bf0fca1623de4147a10114fefbdadbf3ca6a17d6663bfaec5c8f4d3a8c2607752741effd116115e687ad760d6204f8d71c5f855c1390b0a9845fd1d8b90
@@ -0,0 +1,152 @@
1
+ module Agents
2
+ class InstagramAgent < Agent
3
+ can_dry_run!
4
+
5
+ default_schedule 'every_1h'
6
+
7
+ description <<-MD
8
+ Monitor public Instagram accounts and creates an event for each post.
9
+
10
+ It can be scheduled to hit Instagram as much as you want but will obey
11
+ the `wait_between_refresh` for each account to avoid being banned.
12
+ If set to `0` it will refresh all accounts at every run.
13
+
14
+ Links generally expire after 24 hours but this agent will try to keep the
15
+ corresponding events updated so they can be used in a feed.
16
+ MD
17
+
18
+
19
+ def default_options
20
+ {
21
+ :wait_between_refresh => 86400,
22
+ :accounts_to_monitor => []
23
+ }
24
+ end
25
+
26
+
27
+ def validate_options
28
+ options['wait_between_refresh'] ||= 86400
29
+ errors.add(:base, "`wait_between_refresh` must be an integer >=0") unless (options['wait_between_refresh'].to_i >= 0)
30
+
31
+ errors.add(:base, "`accounts_to_monitor` must be an array of strings") unless options['accounts_to_monitor'].is_a?(Array)
32
+ options['accounts_to_monitor'].each{|v| v.sub!(/^@+/,'')}
33
+ end
34
+
35
+
36
+ def working?
37
+ memory['error'] != true
38
+ end
39
+
40
+
41
+ def check
42
+ memory['error'] = nil
43
+
44
+ accounts_to_refresh.each do |account|
45
+ remember_fetching(account)
46
+
47
+ posts = get_posts(account) or next
48
+
49
+ posts.each do |post|
50
+ if seen_before?(post)
51
+ update_existing_event(post)
52
+ else
53
+ create_event :payload => post
54
+ end
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+
61
+ def get_posts(account)
62
+ url = "https://www.instagram.com/#{account}/?__a=1&__d=di"
63
+
64
+ response = HTTParty.get(url,
65
+ :headers => {
66
+ 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
67
+ }
68
+ )
69
+
70
+ unless response.success?
71
+ error("[#{account}] Could not fetch #{url} - error #{response.code} | headers #{response.headers}")
72
+ memory['error'] = true
73
+ return nil
74
+ end
75
+
76
+ json = response.parsed_response
77
+
78
+ unless json
79
+ error("[#{account}] Could not extract JSON from #{url} - raw #{response.parsed_response} | headers #{response.headers}")
80
+ memory['error'] = true
81
+ return nil
82
+ end
83
+
84
+ posts = extract_posts(json)
85
+
86
+ unless posts.any?
87
+ error("[#{account}] Could not find any posts, strange - raw #{response.parsed_response} | headers #{response.headers} | json #{json}")
88
+ memory['error'] = true
89
+ return nil
90
+ end
91
+
92
+ return Array(posts).compact
93
+ end
94
+
95
+ def extract_posts(json)
96
+ json['graphql']['user']['edge_owner_to_timeline_media']['edges'].map do |edge|
97
+ edge['node']
98
+ end
99
+
100
+ rescue => e
101
+ return []
102
+ end
103
+
104
+
105
+ private
106
+
107
+
108
+ def instagramid_to_eventid
109
+ @instagramid_to_eventid ||= events.all.map{|e| [e.payload['id'], e.id]}.to_h
110
+ end
111
+
112
+
113
+ def seen_before?(post)
114
+ instagramid_to_eventid.has_key?(post['id'])
115
+ end
116
+
117
+
118
+ def update_existing_event(post)
119
+ event_id = instagramid_to_eventid.fetch(post['id'])
120
+ event = events.find(event_id)
121
+ event.payload = post
122
+ create_event(event)
123
+ end
124
+
125
+
126
+ def all_accounts
127
+ interpolated['accounts_to_monitor']
128
+ end
129
+
130
+
131
+ def stale_accounts(refresh_every)
132
+ all_accounts.select do |account|
133
+ last_fetched_at = memory.dig('last_fetched_at', account)
134
+ last_fetched_at.nil? || last_fetched_at < refresh_every.seconds.ago.to_i
135
+ end
136
+ end
137
+
138
+
139
+ def accounts_to_refresh
140
+ refresh_every = interpolated['wait_between_refresh'].to_i
141
+
142
+ return Array(refresh_every ? stale_accounts(refresh_every).sample : all_accounts)
143
+ end
144
+
145
+
146
+ def remember_fetching(account)
147
+ memory['last_fetched_at'] ||= {}
148
+ memory['last_fetched_at'][account] = Time.now.to_i
149
+ end
150
+
151
+ end
152
+ end
@@ -0,0 +1,4 @@
1
+ require 'huginn_agent'
2
+
3
+ #HuginnAgent.load 'huginn_instagram_agent/concerns/my_agent_concern'
4
+ HuginnAgent.register 'huginn_instagram_agent/instagram_agent'
@@ -0,0 +1,13 @@
1
+ require 'rails_helper'
2
+ require 'huginn_agent/spec_helper'
3
+
4
+ describe Agents::InstagramAgent do
5
+ before(:each) do
6
+ @valid_options = Agents::InstagramAgent.new.default_options
7
+ @checker = Agents::InstagramAgent.new(:name => "InstagramAgent", :options => @valid_options)
8
+ @checker.user = users(:bob)
9
+ @checker.save!
10
+ end
11
+
12
+ pending "add specs here"
13
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: huginn_instagram_agent
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.4
5
+ platform: ruby
6
+ authors:
7
+ - Alessio Signorini
8
+ - Víctor A. Rodríguez
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2022-12-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.7'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.7'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '12.3'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '12.3'
42
+ - !ruby/object:Gem::Dependency
43
+ name: huginn_agent
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0.6'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0.6'
56
+ - !ruby/object:Gem::Dependency
57
+ name: httparty
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0.7'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0.7'
70
+ description:
71
+ email:
72
+ - alessio@signorini.us
73
+ - victor@bit-man.guru
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - lib/huginn_instagram_agent.rb
79
+ - lib/huginn_instagram_agent/instagram_agent.rb
80
+ - spec/instagram_agent_spec.rb
81
+ homepage: https://github.com/alessio-signorini/huginn-instagram-agent
82
+ licenses:
83
+ - MIT
84
+ metadata: {}
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubygems_version: 3.1.4
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: Huginn Agent that monitors public Instagram accounts
104
+ test_files:
105
+ - spec/instagram_agent_spec.rb