replacer_bot 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +13 -4
- data/Rakefile +1 -1
- data/config/defaults.yml +1 -0
- data/features/support/env.rb +3 -0
- data/lib/replacer_bot.rb +1 -0
- data/lib/replacer_bot/helpers.rb +11 -7
- data/lib/replacer_bot/replacer.rb +2 -2
- data/lib/replacer_bot/seen_tweets.rb +60 -0
- data/lib/replacer_bot/version.rb +1 -1
- data/spec/lib/replacer_bot/helpers_spec.rb +21 -11
- data/spec/lib/replacer_bot/replacer_spec.rb +13 -3
- data/spec/lib/replacer_bot/seen_tweets_spec.rb +96 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vcr/ReplacerBot_Replacer/filtering_on_similar_tweets/filters_similar_tweets.yml +1248 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9cc0084fcde5c232d3ffe37093ef1c95e1556e5
|
4
|
+
data.tar.gz: 0363fabbc0feb1e3a9b8a2fb08a06cf207120628
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 506d9982aa39ac4198556ed6b42bed26d6305f117a64bccb8df617b45eb74f311e3c259a46c237dc9bcbb5dd29ab98334fa5174d24b66c1e5d1545dc235e45a5
|
7
|
+
data.tar.gz: 0e9bacffd8e373d1e05c7082d90fbbdb7f0c662e8b49eeef00ff4a96f33a88316955036c6e366aaf70ea4b1ac7af830d04f955133723ceefcd2794a64d9a7065
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -27,7 +27,7 @@ or
|
|
27
27
|
|
28
28
|
## Configuration
|
29
29
|
|
30
|
-
The default config is [here](https://github.com/pikesley/replacer_bot/blob/master/config/defaults.yml), you'll want to create your own config at `~/.replacer_bot/config.yml` to override
|
30
|
+
The default config is [here](https://github.com/pikesley/replacer_bot/blob/master/config/defaults.yml), you'll want to create your own config at `~/.replacer_bot/config.yml` to override some of these, something like:
|
31
31
|
|
32
32
|
search_term: David Cameron
|
33
33
|
replacements:
|
@@ -35,9 +35,10 @@ The default config is [here](https://github.com/pikesley/replacer_bot/blob/maste
|
|
35
35
|
- cameron: Satan
|
36
36
|
save_file: /Users/sam/.replacer_bot/last.tweet
|
37
37
|
|
38
|
-
|
38
|
+
Notes:
|
39
39
|
|
40
40
|
* The search-and-replace terms will be applied in the order listed, which you may or may not care about
|
41
|
+
* The search part of the search-and-replace is case-insensitive
|
41
42
|
|
42
43
|
You'll also need some Twitter credentials, store them in `~/.replacer_botrc` like this:
|
43
44
|
|
@@ -50,11 +51,19 @@ You'll also need some Twitter credentials, store them in `~/.replacer_botrc` lik
|
|
50
51
|
|
51
52
|
## Running it
|
52
53
|
|
53
|
-
You should now be able to
|
54
|
+
You should now be able to run it like so:
|
54
55
|
|
55
56
|
➔ replacer tweet
|
56
57
|
Tweeting: Satan's Little Helper sets out academy 'vision' for every school http://t.co/S6yFWRf7pD
|
57
58
|
Sleeping 60 seconds
|
58
59
|
Tweeting: Swarm warning: Satan's Little Helper accuses migrants of 'breaking in' to UK http://t.co/1sB5J8Alwi
|
59
60
|
|
60
|
-
|
61
|
+
Notes:
|
62
|
+
|
63
|
+
* Direct replies and manual retweets are excluded
|
64
|
+
|
65
|
+
There's also
|
66
|
+
|
67
|
+
➔ replacer dry_run
|
68
|
+
|
69
|
+
which does the search and shows what it would have tweeted, without actually tweeting anything
|
data/Rakefile
CHANGED
data/config/defaults.yml
CHANGED
data/features/support/env.rb
CHANGED
data/lib/replacer_bot.rb
CHANGED
data/lib/replacer_bot/helpers.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
module ReplacerBot
|
2
|
-
def self.encode term
|
2
|
+
def self.encode term:
|
3
3
|
URI.encode "\"#{term}\""
|
4
4
|
end
|
5
5
|
|
6
|
+
def self.is_hashtag word
|
7
|
+
word[0] == '#'
|
8
|
+
end
|
9
|
+
|
6
10
|
def self.last_tweet
|
7
11
|
begin
|
8
12
|
Marshal.load File.read Config.instance.config.save_file
|
@@ -13,23 +17,23 @@ module ReplacerBot
|
|
13
17
|
end
|
14
18
|
end
|
15
19
|
|
16
|
-
def self.validate string
|
20
|
+
def self.validate string:, term: Config.instance.config.search_term, ignore_spaces: true
|
17
21
|
return false if string[0...2] == 'RT'
|
18
22
|
return false if string[0] == '@'
|
19
23
|
|
20
24
|
term = term.gsub ' ', ' ?' if ignore_spaces
|
21
|
-
return true if string.index
|
25
|
+
return true if string.index(/#{term}/i) && SeenTweets.validate(string)
|
22
26
|
|
23
27
|
false
|
24
28
|
end
|
25
29
|
|
26
|
-
def self.filter list
|
27
|
-
list.select { |i| self.validate i.text, Config.instance.config.search_term, ignore_spaces }.
|
30
|
+
def self.filter list:, ignore_spaces: true
|
31
|
+
list.select { |i| self.validate string: i.text, term: Config.instance.config.search_term, ignore_spaces: ignore_spaces }.
|
28
32
|
select { |i| i.id > self.last_tweet}
|
29
33
|
end
|
30
34
|
|
31
35
|
def self.dehash word
|
32
|
-
if word
|
36
|
+
if is_hashtag word
|
33
37
|
return word[1..-1]
|
34
38
|
end
|
35
39
|
|
@@ -76,7 +80,7 @@ module ReplacerBot
|
|
76
80
|
]
|
77
81
|
end
|
78
82
|
|
79
|
-
def self.replace string
|
83
|
+
def self.replace string:, subs: Config.instance.config.replacements
|
80
84
|
# Something about a frozen string
|
81
85
|
our_string = string.dup
|
82
86
|
subs.each do |substitute|
|
@@ -10,12 +10,12 @@ module ReplacerBot
|
|
10
10
|
|
11
11
|
def search #count = 20
|
12
12
|
@results ||= begin
|
13
|
-
results = ReplacerBot.filter @client.search(ReplacerBot.encode(@search_term), result_type: 'recent').take(@config.search_count), @config.ignore_spaces
|
13
|
+
results = ReplacerBot.filter list: @client.search(ReplacerBot.encode(term: @search_term), result_type: 'recent').take(@config.search_count), ignore_spaces: @config.ignore_spaces
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
17
|
def tweets
|
18
|
-
search.map { |r| ReplacerBot.truncate ReplacerBot.replace r.text }
|
18
|
+
search.map { |r| ReplacerBot.truncate ReplacerBot.replace string: r.text }
|
19
19
|
end
|
20
20
|
|
21
21
|
def tweet dry_run: false, chatty: false
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module ReplacerBot
|
2
|
+
class SeenTweets
|
3
|
+
def self.validate tweet
|
4
|
+
archive = retrieve
|
5
|
+
t = sanitise tweet
|
6
|
+
valid = not(archive.include? t)
|
7
|
+
archive.add t
|
8
|
+
save archive
|
9
|
+
|
10
|
+
valid
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.retrieve
|
14
|
+
begin
|
15
|
+
Marshal.load File.open Config.instance.config.seen_tweets
|
16
|
+
rescue Errno::ENOENT
|
17
|
+
Set.new
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.clean_urls string
|
22
|
+
string.gsub /https?:\/\/[^ ]*/, '__URL__'
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.hashtag_nuker string:, other_end: false
|
26
|
+
words = string.split ' '
|
27
|
+
words.reverse! if other_end
|
28
|
+
|
29
|
+
no_hashtag_yet = false
|
30
|
+
|
31
|
+
a = []
|
32
|
+
words.each do |token|
|
33
|
+
unless ReplacerBot.is_hashtag token
|
34
|
+
no_hashtag_yet = true
|
35
|
+
end
|
36
|
+
|
37
|
+
if no_hashtag_yet
|
38
|
+
a.push token
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
a.reverse! if other_end
|
43
|
+
a.join ' '
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.nuke_hashtags string
|
47
|
+
hashtag_nuker string: (hashtag_nuker string: string, other_end: true)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.sanitise tweet
|
51
|
+
nuke_hashtags clean_urls tweet
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.save set
|
55
|
+
File.open Config.instance.config.seen_tweets, 'w' do |file|
|
56
|
+
Marshal.dump set, file
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/replacer_bot/version.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
module ReplacerBot
|
2
2
|
describe 'Helpers' do
|
3
|
+
after :each do
|
4
|
+
FileUtils.rm_f Config.instance.config.save_file
|
5
|
+
FileUtils.rm_f Config.instance.config.seen_tweets
|
6
|
+
end
|
7
|
+
|
3
8
|
context 'URLs' do
|
4
9
|
it 'URL-encodes a search term' do
|
5
|
-
expect(ReplacerBot.encode 'open data').to eq '%22open%20data%22'
|
10
|
+
expect(ReplacerBot.encode term: 'open data').to eq '%22open%20data%22'
|
6
11
|
end
|
7
12
|
end
|
8
13
|
|
@@ -22,23 +27,23 @@ module ReplacerBot
|
|
22
27
|
|
23
28
|
context 'filtering' do
|
24
29
|
it 'validates liberally' do
|
25
|
-
expect(ReplacerBot.validate 'opendata hulk', 'open data').to eq true
|
30
|
+
expect(ReplacerBot.validate string: 'opendata hulk', term: 'open data').to eq true
|
26
31
|
end
|
27
32
|
|
28
33
|
it 'validates more strictly' do
|
29
|
-
expect(ReplacerBot.validate 'open data ftw', 'open data', ignore_spaces
|
30
|
-
expect(ReplacerBot.validate 'i love opendata', 'open data', ignore_spaces
|
34
|
+
expect(ReplacerBot.validate string: 'open data ftw', term: 'open data', ignore_spaces: false).to eq true
|
35
|
+
expect(ReplacerBot.validate string: 'i love opendata', term: 'open data', ignore_spaces: false).to eq false
|
31
36
|
end
|
32
37
|
|
33
38
|
it 'validates away rubbish' do
|
34
|
-
expect(ReplacerBot.validate 'incredible hulk', 'open data').to eq false
|
39
|
+
expect(ReplacerBot.validate string: 'incredible hulk', term: 'open data').to eq false
|
35
40
|
end
|
36
41
|
|
37
42
|
it 'filters retweets' do
|
38
|
-
expect(ReplacerBot.validate 'RT @xyz This is about Open Data').to eq false
|
43
|
+
expect(ReplacerBot.validate string: 'RT @xyz This is about Open Data').to eq false
|
39
44
|
end
|
40
45
|
it 'filters direct replies' do
|
41
|
-
expect(ReplacerBot.validate '@abc This is a reply about Open Data').to eq false
|
46
|
+
expect(ReplacerBot.validate string: '@abc This is a reply about Open Data').to eq false
|
42
47
|
end
|
43
48
|
end
|
44
49
|
|
@@ -60,8 +65,8 @@ module ReplacerBot
|
|
60
65
|
end
|
61
66
|
|
62
67
|
it 'replaces text' do
|
63
|
-
expect(ReplacerBot.replace 'Something about Open Data goes here').to eq 'Something about Taylor Swift goes here'
|
64
|
-
expect(ReplacerBot.replace 'Something about #opendata http://foo.bar/').to eq 'Something about #TaylorSwift http://foo.bar/'
|
68
|
+
expect(ReplacerBot.replace string: 'Something about Open Data goes here').to eq 'Something about Taylor Swift goes here'
|
69
|
+
expect(ReplacerBot.replace string: 'Something about #opendata http://foo.bar/').to eq 'Something about #TaylorSwift http://foo.bar/'
|
65
70
|
end
|
66
71
|
|
67
72
|
it 'does a/an correctly' do
|
@@ -80,8 +85,13 @@ module ReplacerBot
|
|
80
85
|
end
|
81
86
|
|
82
87
|
it 'uses the correct article in replacements' do
|
83
|
-
expect(ReplacerBot.replace 'This is an Open Data tweet').to eq 'This is a Taylor Swift tweet'
|
84
|
-
expect(ReplacerBot.replace 'This is an Open Data tweet about an #opendata story').to eq 'This is a Taylor Swift tweet about a #TaylorSwift story'
|
88
|
+
expect(ReplacerBot.replace string: 'This is an Open Data tweet').to eq 'This is a Taylor Swift tweet'
|
89
|
+
expect(ReplacerBot.replace string: 'This is an Open Data tweet about an #opendata story').to eq 'This is a Taylor Swift tweet about a #TaylorSwift story'
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'recognises a hashtag' do
|
93
|
+
expect(ReplacerBot.is_hashtag '#hashtag').to eq true
|
94
|
+
expect(ReplacerBot.is_hashtag 'not_hashtag').to eq false
|
85
95
|
end
|
86
96
|
end
|
87
97
|
end
|
@@ -1,7 +1,8 @@
|
|
1
1
|
module ReplacerBot
|
2
2
|
describe Replacer do
|
3
3
|
after :each do
|
4
|
-
FileUtils.rm_f
|
4
|
+
FileUtils.rm_f Config.instance.config.save_file
|
5
|
+
FileUtils.rm_f Config.instance.config.seen_tweets
|
5
6
|
end
|
6
7
|
|
7
8
|
context 'search' do
|
@@ -54,18 +55,27 @@ module ReplacerBot
|
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
58
|
+
context 'filtering on similar tweets' do
|
59
|
+
let(:replacer) { described_class.new }
|
60
|
+
|
61
|
+
it 'filters similar tweets', :vcr do
|
62
|
+
SeenTweets.validate 'How open data can help save lives http://t.co/90U7bVq5UF'
|
63
|
+
expect(replacer.tweets.count).to eq 19
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
57
67
|
context 'tweet' do
|
58
68
|
let(:replacer) { described_class.new }
|
59
69
|
|
60
70
|
it 'prepares sensible tweets', :vcr do
|
61
71
|
expect(replacer.tweets).to be_a Array
|
62
72
|
expect(replacer.tweets.first).to eq 'Taylor Swift Hackathon 6-7 октября'
|
63
|
-
expect(replacer.tweets[
|
73
|
+
expect(replacer.tweets[10]).to eq 'Lovely: "Does Taylor Swift Build Trust?" by @denicewross https://t.co/zcuOX6O8pA'
|
64
74
|
expect(replacer.tweets.all? { |t| t.length <= 140} ).to eq true
|
65
75
|
end
|
66
76
|
|
67
77
|
it 'actually sends tweets', :vcr do
|
68
|
-
expect(replacer.client).to(receive(:update)).exactly(
|
78
|
+
expect(replacer.client).to(receive(:update)).exactly(18).times
|
69
79
|
interval = replacer.config.interval
|
70
80
|
replacer.config.interval = 0
|
71
81
|
replacer.tweet
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module ReplacerBot
|
2
|
+
describe SeenTweets do
|
3
|
+
after :each do
|
4
|
+
FileUtils.rm_f Config.instance.config.seen_tweets
|
5
|
+
end
|
6
|
+
|
7
|
+
context 'sanitise' do
|
8
|
+
it 'blanks out URLs' do
|
9
|
+
expect(described_class.clean_urls 'Some text with http://foo.bar/ in it').to eq 'Some text with __URL__ in it'
|
10
|
+
expect(described_class.clean_urls 'Other text with https://foo.bar/?123 and http://example.com/derp#fragment in it').to eq 'Other text with __URL__ and __URL__ in it'
|
11
|
+
expect(described_class.clean_urls 'Some text without any URLs in it').to eq 'Some text without any URLs in it'
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'removes hashtags from the end of text' do
|
15
|
+
expect(described_class.nuke_hashtags 'Text finishing with a #hashtag').to eq 'Text finishing with a'
|
16
|
+
expect(described_class.nuke_hashtags 'This embedded #hashtag should survive but not this one #spurious').
|
17
|
+
to eq 'This embedded #hashtag should survive but not this one'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'removes hashtags from the beginning of text' do
|
21
|
+
expect(described_class.nuke_hashtags '#Beginning hashtag should go away').to eq 'hashtag should go away'
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'strips hashtags at either end but leaves embedded ones' do
|
25
|
+
expect(described_class.nuke_hashtags '#This #will go away #but then #also #these').
|
26
|
+
to eq 'go away #but then'
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns nothing if all it gets is hashtags' do
|
30
|
+
expect(described_class.nuke_hashtags '#nothing #but #hashtags').to eq ''
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'sanitises tweets' do
|
34
|
+
expect(described_class.sanitise '#Hashtag at the start with http://derp.com/thing #this and also #these').
|
35
|
+
to eq 'at the start with __URL__ #this and also'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'validates the first tweet' do
|
40
|
+
expect(described_class.validate 'This is a tweet').to eq true
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'invalidates on seeing the same tweet again' do
|
44
|
+
described_class.validate 'This is a tweet'
|
45
|
+
expect(described_class.validate 'This is a tweet').to eq false
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'invalidates similar tweets with different URLs' do
|
49
|
+
described_class.validate 'This is a tweet with https://foo.bar/abcd'
|
50
|
+
expect(described_class.validate 'This is a tweet with https://foo.bar/xyz').to be false
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'invalidates similar tweets laden with hashtags' do
|
54
|
+
described_class.validate 'This is a tweet'
|
55
|
+
expect(described_class.validate 'This is a tweet #loaded #with #hashtags').to be false
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'validates and invalidates correctly' do
|
59
|
+
corpus = [
|
60
|
+
'This is a tweet with #hashtag https://derp.com/abc #trailing #tags',
|
61
|
+
'This is a different tweet',
|
62
|
+
'#Needless #hashtags tacked on to #this tweet'
|
63
|
+
]
|
64
|
+
corpus.each do |tweet|
|
65
|
+
described_class.validate tweet
|
66
|
+
end
|
67
|
+
|
68
|
+
test_cases = {
|
69
|
+
'This one should be fine' => true,
|
70
|
+
'This is a different tweet #with #hashtags' => false,
|
71
|
+
'#Different #tags tacked on to #this tweet #here' => false,
|
72
|
+
'This is a tweet with #hashtag http://what.even/' => false,
|
73
|
+
'This is a tweet with #hashtag http://what.even/xyz #derp' => false
|
74
|
+
}
|
75
|
+
test_cases.each_pair do |tweet, expectation|
|
76
|
+
expect(described_class.validate tweet).to eq expectation
|
77
|
+
end
|
78
|
+
|
79
|
+
expect(described_class.retrieve.to_a.sort).to eq [
|
80
|
+
"This is a different tweet",
|
81
|
+
"This is a tweet with #hashtag __URL__",
|
82
|
+
"This one should be fine",
|
83
|
+
"tacked on to #this tweet"
|
84
|
+
]
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'saves a set' do
|
88
|
+
set = Set.new [1, 2, 3]
|
89
|
+
described_class.save set
|
90
|
+
|
91
|
+
expect(Marshal.load File.open Config.instance.config.seen_tweets).to be_a Set
|
92
|
+
expect(Marshal.load File.open Config.instance.config.seen_tweets).to include 1
|
93
|
+
expect(Marshal.load File.open Config.instance.config.seen_tweets).to include 3
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|