huginn_instagram_agent 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 645f7e41f1ac78754c549774a4fda0aaee43e8ab8de1e549fba9dc16dde64ebf
4
+ data.tar.gz: abcb3c6662524c4d74b3d7a3e8a5e627a64c794f89f44547d9223934b385e770
5
+ SHA512:
6
+ metadata.gz: 4d52637e2616dccb2f056c7a980103b1a5311d769273cc5abf3223d826fb5fa88cedd02608d470568577f69abbd6c7f002d476018f7ad2eb2cbe254a3bea7c8f
7
+ data.tar.gz: e6287bf0fca1623de4147a10114fefbdadbf3ca6a17d6663bfaec5c8f4d3a8c2607752741effd116115e687ad760d6204f8d71c5f855c1390b0a9845fd1d8b90
@@ -0,0 +1,152 @@
1
+ module Agents
2
+ class InstagramAgent < Agent
3
+ can_dry_run!
4
+
5
+ default_schedule 'every_1h'
6
+
7
+ description <<-MD
8
+ Monitor public Instagram accounts and creates an event for each post.
9
+
10
+ It can be scheduled to hit Instagram as much as you want but will obey
11
+ the `wait_between_refresh` for each account to avoid being banned.
12
+ If set to `0` it will refresh all accounts at every run.
13
+
14
+ Links generally expire after 24 hours but this agent will try to keep the
15
+ corresponding events updated so they can be used in a feed.
16
+ MD
17
+
18
+
19
+ def default_options
20
+ {
21
+ :wait_between_refresh => 86400,
22
+ :accounts_to_monitor => []
23
+ }
24
+ end
25
+
26
+
27
+ def validate_options
28
+ options['wait_between_refresh'] ||= 86400
29
+ errors.add(:base, "`wait_between_refresh` must be an integer >=0") unless (options['wait_between_refresh'].to_i >= 0)
30
+
31
+ errors.add(:base, "`accounts_to_monitor` must be an array of strings") unless options['accounts_to_monitor'].is_a?(Array)
32
+ options['accounts_to_monitor'].each{|v| v.sub!(/^@+/,'')}
33
+ end
34
+
35
+
36
+ def working?
37
+ memory['error'] != true
38
+ end
39
+
40
+
41
+ def check
42
+ memory['error'] = nil
43
+
44
+ accounts_to_refresh.each do |account|
45
+ remember_fetching(account)
46
+
47
+ posts = get_posts(account) or next
48
+
49
+ posts.each do |post|
50
+ if seen_before?(post)
51
+ update_existing_event(post)
52
+ else
53
+ create_event :payload => post
54
+ end
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+
61
+ def get_posts(account)
62
+ url = "https://www.instagram.com/#{account}/?__a=1&__d=di"
63
+
64
+ response = HTTParty.get(url,
65
+ :headers => {
66
+ 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
67
+ }
68
+ )
69
+
70
+ unless response.success?
71
+ error("[#{account}] Could not fetch #{url} - error #{response.code} | headers #{response.headers}")
72
+ memory['error'] = true
73
+ return nil
74
+ end
75
+
76
+ json = response.parsed_response
77
+
78
+ unless json
79
+ error("[#{account}] Could not extract JSON from #{url} - raw #{response.parsed_response} | headers #{response.headers}")
80
+ memory['error'] = true
81
+ return nil
82
+ end
83
+
84
+ posts = extract_posts(json)
85
+
86
+ unless posts.any?
87
+ error("[#{account}] Could not find any posts, strange - raw #{response.parsed_response} | headers #{response.headers} | json #{json}")
88
+ memory['error'] = true
89
+ return nil
90
+ end
91
+
92
+ return Array(posts).compact
93
+ end
94
+
95
+ def extract_posts(json)
96
+ json['graphql']['user']['edge_owner_to_timeline_media']['edges'].map do |edge|
97
+ edge['node']
98
+ end
99
+
100
+ rescue => e
101
+ return []
102
+ end
103
+
104
+
105
+ private
106
+
107
+
108
+ def instagramid_to_eventid
109
+ @instagramid_to_eventid ||= events.all.map{|e| [e.payload['id'], e.id]}.to_h
110
+ end
111
+
112
+
113
+ def seen_before?(post)
114
+ instagramid_to_eventid.has_key?(post['id'])
115
+ end
116
+
117
+
118
+ def update_existing_event(post)
119
+ event_id = instagramid_to_eventid.fetch(post['id'])
120
+ event = events.find(event_id)
121
+ event.payload = post
122
+ create_event(event)
123
+ end
124
+
125
+
126
+ def all_accounts
127
+ interpolated['accounts_to_monitor']
128
+ end
129
+
130
+
131
+ def stale_accounts(refresh_every)
132
+ all_accounts.select do |account|
133
+ last_fetched_at = memory.dig('last_fetched_at', account)
134
+ last_fetched_at.nil? || last_fetched_at < refresh_every.seconds.ago.to_i
135
+ end
136
+ end
137
+
138
+
139
+ def accounts_to_refresh
140
+ refresh_every = interpolated['wait_between_refresh'].to_i
141
+
142
+ return Array(refresh_every ? stale_accounts(refresh_every).sample : all_accounts)
143
+ end
144
+
145
+
146
+ def remember_fetching(account)
147
+ memory['last_fetched_at'] ||= {}
148
+ memory['last_fetched_at'][account] = Time.now.to_i
149
+ end
150
+
151
+ end
152
+ end
@@ -0,0 +1,4 @@
1
+ require 'huginn_agent'
2
+
3
+ #HuginnAgent.load 'huginn_instagram_agent/concerns/my_agent_concern'
4
+ HuginnAgent.register 'huginn_instagram_agent/instagram_agent'
@@ -0,0 +1,13 @@
1
+ require 'rails_helper'
2
+ require 'huginn_agent/spec_helper'
3
+
4
+ describe Agents::InstagramAgent do
5
+ before(:each) do
6
+ @valid_options = Agents::InstagramAgent.new.default_options
7
+ @checker = Agents::InstagramAgent.new(:name => "InstagramAgent", :options => @valid_options)
8
+ @checker.user = users(:bob)
9
+ @checker.save!
10
+ end
11
+
12
+ pending "add specs here"
13
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: huginn_instagram_agent
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.4
5
+ platform: ruby
6
+ authors:
7
+ - Alessio Signorini
8
+ - Víctor A. Rodríguez
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2022-12-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.7'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.7'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '12.3'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '12.3'
42
+ - !ruby/object:Gem::Dependency
43
+ name: huginn_agent
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0.6'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0.6'
56
+ - !ruby/object:Gem::Dependency
57
+ name: httparty
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0.7'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0.7'
70
+ description:
71
+ email:
72
+ - alessio@signorini.us
73
+ - victor@bit-man.guru
74
+ executables: []
75
+ extensions: []
76
+ extra_rdoc_files: []
77
+ files:
78
+ - lib/huginn_instagram_agent.rb
79
+ - lib/huginn_instagram_agent/instagram_agent.rb
80
+ - spec/instagram_agent_spec.rb
81
+ homepage: https://github.com/alessio-signorini/huginn-instagram-agent
82
+ licenses:
83
+ - MIT
84
+ metadata: {}
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubygems_version: 3.1.4
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: Huginn Agent that monitors public Instagram accounts
104
+ test_files:
105
+ - spec/instagram_agent_spec.rb