replacer_bot 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +13 -4
- data/Rakefile +1 -1
- data/config/defaults.yml +1 -0
- data/features/support/env.rb +3 -0
- data/lib/replacer_bot.rb +1 -0
- data/lib/replacer_bot/helpers.rb +11 -7
- data/lib/replacer_bot/replacer.rb +2 -2
- data/lib/replacer_bot/seen_tweets.rb +60 -0
- data/lib/replacer_bot/version.rb +1 -1
- data/spec/lib/replacer_bot/helpers_spec.rb +21 -11
- data/spec/lib/replacer_bot/replacer_spec.rb +13 -3
- data/spec/lib/replacer_bot/seen_tweets_spec.rb +96 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vcr/ReplacerBot_Replacer/filtering_on_similar_tweets/filters_similar_tweets.yml +1248 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9cc0084fcde5c232d3ffe37093ef1c95e1556e5
|
4
|
+
data.tar.gz: 0363fabbc0feb1e3a9b8a2fb08a06cf207120628
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 506d9982aa39ac4198556ed6b42bed26d6305f117a64bccb8df617b45eb74f311e3c259a46c237dc9bcbb5dd29ab98334fa5174d24b66c1e5d1545dc235e45a5
|
7
|
+
data.tar.gz: 0e9bacffd8e373d1e05c7082d90fbbdb7f0c662e8b49eeef00ff4a96f33a88316955036c6e366aaf70ea4b1ac7af830d04f955133723ceefcd2794a64d9a7065
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -27,7 +27,7 @@ or
|
|
27
27
|
|
28
28
|
## Configuration
|
29
29
|
|
30
|
-
The default config is [here](https://github.com/pikesley/replacer_bot/blob/master/config/defaults.yml), you'll want to create your own config at `~/.replacer_bot/config.yml` to override
|
30
|
+
The default config is [here](https://github.com/pikesley/replacer_bot/blob/master/config/defaults.yml), you'll want to create your own config at `~/.replacer_bot/config.yml` to override some of these, something like:
|
31
31
|
|
32
32
|
search_term: David Cameron
|
33
33
|
replacements:
|
@@ -35,9 +35,10 @@ The default config is [here](https://github.com/pikesley/replacer_bot/blob/maste
|
|
35
35
|
- cameron: Satan
|
36
36
|
save_file: /Users/sam/.replacer_bot/last.tweet
|
37
37
|
|
38
|
-
|
38
|
+
Notes:
|
39
39
|
|
40
40
|
* The search-and-replace terms will be applied in the order listed, which you may or may not care about
|
41
|
+
* The search part of the search-and-replace is case-insensitive
|
41
42
|
|
42
43
|
You'll also need some Twitter credentials, store them in `~/.replacer_botrc` like this:
|
43
44
|
|
@@ -50,11 +51,19 @@ You'll also need some Twitter credentials, store them in `~/.replacer_botrc` lik
|
|
50
51
|
|
51
52
|
## Running it
|
52
53
|
|
53
|
-
You should now be able to
|
54
|
+
You should now be able to run it like so:
|
54
55
|
|
55
56
|
➔ replacer tweet
|
56
57
|
Tweeting: Satan's Little Helper sets out academy 'vision' for every school http://t.co/S6yFWRf7pD
|
57
58
|
Sleeping 60 seconds
|
58
59
|
Tweeting: Swarm warning: Satan's Little Helper accuses migrants of 'breaking in' to UK http://t.co/1sB5J8Alwi
|
59
60
|
|
60
|
-
|
61
|
+
Notes:
|
62
|
+
|
63
|
+
* Direct replies and manual retweets are excluded
|
64
|
+
|
65
|
+
There's also
|
66
|
+
|
67
|
+
➔ replacer dry_run
|
68
|
+
|
69
|
+
which does the search and shows what it would have tweeted, without actually tweeting anything
|
data/Rakefile
CHANGED
data/config/defaults.yml
CHANGED
data/features/support/env.rb
CHANGED
data/lib/replacer_bot.rb
CHANGED
data/lib/replacer_bot/helpers.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
module ReplacerBot
|
2
|
-
def self.encode term
|
2
|
+
def self.encode term:
|
3
3
|
URI.encode "\"#{term}\""
|
4
4
|
end
|
5
5
|
|
6
|
+
def self.is_hashtag word
|
7
|
+
word[0] == '#'
|
8
|
+
end
|
9
|
+
|
6
10
|
def self.last_tweet
|
7
11
|
begin
|
8
12
|
Marshal.load File.read Config.instance.config.save_file
|
@@ -13,23 +17,23 @@ module ReplacerBot
|
|
13
17
|
end
|
14
18
|
end
|
15
19
|
|
16
|
-
def self.validate string
|
20
|
+
def self.validate string:, term: Config.instance.config.search_term, ignore_spaces: true
|
17
21
|
return false if string[0...2] == 'RT'
|
18
22
|
return false if string[0] == '@'
|
19
23
|
|
20
24
|
term = term.gsub ' ', ' ?' if ignore_spaces
|
21
|
-
return true if string.index
|
25
|
+
return true if string.index(/#{term}/i) && SeenTweets.validate(string)
|
22
26
|
|
23
27
|
false
|
24
28
|
end
|
25
29
|
|
26
|
-
def self.filter list
|
27
|
-
list.select { |i| self.validate i.text, Config.instance.config.search_term, ignore_spaces }.
|
30
|
+
def self.filter list:, ignore_spaces: true
|
31
|
+
list.select { |i| self.validate string: i.text, term: Config.instance.config.search_term, ignore_spaces: ignore_spaces }.
|
28
32
|
select { |i| i.id > self.last_tweet}
|
29
33
|
end
|
30
34
|
|
31
35
|
def self.dehash word
|
32
|
-
if word
|
36
|
+
if is_hashtag word
|
33
37
|
return word[1..-1]
|
34
38
|
end
|
35
39
|
|
@@ -76,7 +80,7 @@ module ReplacerBot
|
|
76
80
|
]
|
77
81
|
end
|
78
82
|
|
79
|
-
def self.replace string
|
83
|
+
def self.replace string:, subs: Config.instance.config.replacements
|
80
84
|
# Something about a frozen string
|
81
85
|
our_string = string.dup
|
82
86
|
subs.each do |substitute|
|
@@ -10,12 +10,12 @@ module ReplacerBot
|
|
10
10
|
|
11
11
|
def search #count = 20
|
12
12
|
@results ||= begin
|
13
|
-
results = ReplacerBot.filter @client.search(ReplacerBot.encode(@search_term), result_type: 'recent').take(@config.search_count), @config.ignore_spaces
|
13
|
+
results = ReplacerBot.filter list: @client.search(ReplacerBot.encode(term: @search_term), result_type: 'recent').take(@config.search_count), ignore_spaces: @config.ignore_spaces
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
17
|
def tweets
|
18
|
-
search.map { |r| ReplacerBot.truncate ReplacerBot.replace r.text }
|
18
|
+
search.map { |r| ReplacerBot.truncate ReplacerBot.replace string: r.text }
|
19
19
|
end
|
20
20
|
|
21
21
|
def tweet dry_run: false, chatty: false
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module ReplacerBot
|
2
|
+
class SeenTweets
|
3
|
+
def self.validate tweet
|
4
|
+
archive = retrieve
|
5
|
+
t = sanitise tweet
|
6
|
+
valid = not(archive.include? t)
|
7
|
+
archive.add t
|
8
|
+
save archive
|
9
|
+
|
10
|
+
valid
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.retrieve
|
14
|
+
begin
|
15
|
+
Marshal.load File.open Config.instance.config.seen_tweets
|
16
|
+
rescue Errno::ENOENT
|
17
|
+
Set.new
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.clean_urls string
|
22
|
+
string.gsub /https?:\/\/[^ ]*/, '__URL__'
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.hashtag_nuker string:, other_end: false
|
26
|
+
words = string.split ' '
|
27
|
+
words.reverse! if other_end
|
28
|
+
|
29
|
+
no_hashtag_yet = false
|
30
|
+
|
31
|
+
a = []
|
32
|
+
words.each do |token|
|
33
|
+
unless ReplacerBot.is_hashtag token
|
34
|
+
no_hashtag_yet = true
|
35
|
+
end
|
36
|
+
|
37
|
+
if no_hashtag_yet
|
38
|
+
a.push token
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
a.reverse! if other_end
|
43
|
+
a.join ' '
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.nuke_hashtags string
|
47
|
+
hashtag_nuker string: (hashtag_nuker string: string, other_end: true)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.sanitise tweet
|
51
|
+
nuke_hashtags clean_urls tweet
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.save set
|
55
|
+
File.open Config.instance.config.seen_tweets, 'w' do |file|
|
56
|
+
Marshal.dump set, file
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/replacer_bot/version.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
module ReplacerBot
|
2
2
|
describe 'Helpers' do
|
3
|
+
after :each do
|
4
|
+
FileUtils.rm_f Config.instance.config.save_file
|
5
|
+
FileUtils.rm_f Config.instance.config.seen_tweets
|
6
|
+
end
|
7
|
+
|
3
8
|
context 'URLs' do
|
4
9
|
it 'URL-encodes a search term' do
|
5
|
-
expect(ReplacerBot.encode 'open data').to eq '%22open%20data%22'
|
10
|
+
expect(ReplacerBot.encode term: 'open data').to eq '%22open%20data%22'
|
6
11
|
end
|
7
12
|
end
|
8
13
|
|
@@ -22,23 +27,23 @@ module ReplacerBot
|
|
22
27
|
|
23
28
|
context 'filtering' do
|
24
29
|
it 'validates liberally' do
|
25
|
-
expect(ReplacerBot.validate 'opendata hulk', 'open data').to eq true
|
30
|
+
expect(ReplacerBot.validate string: 'opendata hulk', term: 'open data').to eq true
|
26
31
|
end
|
27
32
|
|
28
33
|
it 'validates more strictly' do
|
29
|
-
expect(ReplacerBot.validate 'open data ftw', 'open data', ignore_spaces
|
30
|
-
expect(ReplacerBot.validate 'i love opendata', 'open data', ignore_spaces
|
34
|
+
expect(ReplacerBot.validate string: 'open data ftw', term: 'open data', ignore_spaces: false).to eq true
|
35
|
+
expect(ReplacerBot.validate string: 'i love opendata', term: 'open data', ignore_spaces: false).to eq false
|
31
36
|
end
|
32
37
|
|
33
38
|
it 'validates away rubbish' do
|
34
|
-
expect(ReplacerBot.validate 'incredible hulk', 'open data').to eq false
|
39
|
+
expect(ReplacerBot.validate string: 'incredible hulk', term: 'open data').to eq false
|
35
40
|
end
|
36
41
|
|
37
42
|
it 'filters retweets' do
|
38
|
-
expect(ReplacerBot.validate 'RT @xyz This is about Open Data').to eq false
|
43
|
+
expect(ReplacerBot.validate string: 'RT @xyz This is about Open Data').to eq false
|
39
44
|
end
|
40
45
|
it 'filters direct replies' do
|
41
|
-
expect(ReplacerBot.validate '@abc This is a reply about Open Data').to eq false
|
46
|
+
expect(ReplacerBot.validate string: '@abc This is a reply about Open Data').to eq false
|
42
47
|
end
|
43
48
|
end
|
44
49
|
|
@@ -60,8 +65,8 @@ module ReplacerBot
|
|
60
65
|
end
|
61
66
|
|
62
67
|
it 'replaces text' do
|
63
|
-
expect(ReplacerBot.replace 'Something about Open Data goes here').to eq 'Something about Taylor Swift goes here'
|
64
|
-
expect(ReplacerBot.replace 'Something about #opendata http://foo.bar/').to eq 'Something about #TaylorSwift http://foo.bar/'
|
68
|
+
expect(ReplacerBot.replace string: 'Something about Open Data goes here').to eq 'Something about Taylor Swift goes here'
|
69
|
+
expect(ReplacerBot.replace string: 'Something about #opendata http://foo.bar/').to eq 'Something about #TaylorSwift http://foo.bar/'
|
65
70
|
end
|
66
71
|
|
67
72
|
it 'does a/an correctly' do
|
@@ -80,8 +85,13 @@ module ReplacerBot
|
|
80
85
|
end
|
81
86
|
|
82
87
|
it 'uses the correct article in replacements' do
|
83
|
-
expect(ReplacerBot.replace 'This is an Open Data tweet').to eq 'This is a Taylor Swift tweet'
|
84
|
-
expect(ReplacerBot.replace 'This is an Open Data tweet about an #opendata story').to eq 'This is a Taylor Swift tweet about a #TaylorSwift story'
|
88
|
+
expect(ReplacerBot.replace string: 'This is an Open Data tweet').to eq 'This is a Taylor Swift tweet'
|
89
|
+
expect(ReplacerBot.replace string: 'This is an Open Data tweet about an #opendata story').to eq 'This is a Taylor Swift tweet about a #TaylorSwift story'
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'recognises a hashtag' do
|
93
|
+
expect(ReplacerBot.is_hashtag '#hashtag').to eq true
|
94
|
+
expect(ReplacerBot.is_hashtag 'not_hashtag').to eq false
|
85
95
|
end
|
86
96
|
end
|
87
97
|
end
|
@@ -1,7 +1,8 @@
|
|
1
1
|
module ReplacerBot
|
2
2
|
describe Replacer do
|
3
3
|
after :each do
|
4
|
-
FileUtils.rm_f
|
4
|
+
FileUtils.rm_f Config.instance.config.save_file
|
5
|
+
FileUtils.rm_f Config.instance.config.seen_tweets
|
5
6
|
end
|
6
7
|
|
7
8
|
context 'search' do
|
@@ -54,18 +55,27 @@ module ReplacerBot
|
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
58
|
+
context 'filtering on similar tweets' do
|
59
|
+
let(:replacer) { described_class.new }
|
60
|
+
|
61
|
+
it 'filters similar tweets', :vcr do
|
62
|
+
SeenTweets.validate 'How open data can help save lives http://t.co/90U7bVq5UF'
|
63
|
+
expect(replacer.tweets.count).to eq 19
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
57
67
|
context 'tweet' do
|
58
68
|
let(:replacer) { described_class.new }
|
59
69
|
|
60
70
|
it 'prepares sensible tweets', :vcr do
|
61
71
|
expect(replacer.tweets).to be_a Array
|
62
72
|
expect(replacer.tweets.first).to eq 'Taylor Swift Hackathon 6-7 октября'
|
63
|
-
expect(replacer.tweets[
|
73
|
+
expect(replacer.tweets[10]).to eq 'Lovely: "Does Taylor Swift Build Trust?" by @denicewross https://t.co/zcuOX6O8pA'
|
64
74
|
expect(replacer.tweets.all? { |t| t.length <= 140} ).to eq true
|
65
75
|
end
|
66
76
|
|
67
77
|
it 'actually sends tweets', :vcr do
|
68
|
-
expect(replacer.client).to(receive(:update)).exactly(
|
78
|
+
expect(replacer.client).to(receive(:update)).exactly(18).times
|
69
79
|
interval = replacer.config.interval
|
70
80
|
replacer.config.interval = 0
|
71
81
|
replacer.tweet
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module ReplacerBot
|
2
|
+
describe SeenTweets do
|
3
|
+
after :each do
|
4
|
+
FileUtils.rm_f Config.instance.config.seen_tweets
|
5
|
+
end
|
6
|
+
|
7
|
+
context 'sanitise' do
|
8
|
+
it 'blanks out URLs' do
|
9
|
+
expect(described_class.clean_urls 'Some text with http://foo.bar/ in it').to eq 'Some text with __URL__ in it'
|
10
|
+
expect(described_class.clean_urls 'Other text with https://foo.bar/?123 and http://example.com/derp#fragment in it').to eq 'Other text with __URL__ and __URL__ in it'
|
11
|
+
expect(described_class.clean_urls 'Some text without any URLs in it').to eq 'Some text without any URLs in it'
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'removes hashtags from the end of text' do
|
15
|
+
expect(described_class.nuke_hashtags 'Text finishing with a #hashtag').to eq 'Text finishing with a'
|
16
|
+
expect(described_class.nuke_hashtags 'This embedded #hashtag should survive but not this one #spurious').
|
17
|
+
to eq 'This embedded #hashtag should survive but not this one'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'removes hashtags from the beginning of text' do
|
21
|
+
expect(described_class.nuke_hashtags '#Beginning hashtag should go away').to eq 'hashtag should go away'
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'strips hashtags at either end but leaves embedded ones' do
|
25
|
+
expect(described_class.nuke_hashtags '#This #will go away #but then #also #these').
|
26
|
+
to eq 'go away #but then'
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns nothing if all it gets is hashtags' do
|
30
|
+
expect(described_class.nuke_hashtags '#nothing #but #hashtags').to eq ''
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'sanitises tweets' do
|
34
|
+
expect(described_class.sanitise '#Hashtag at the start with http://derp.com/thing #this and also #these').
|
35
|
+
to eq 'at the start with __URL__ #this and also'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'validates the first tweet' do
|
40
|
+
expect(described_class.validate 'This is a tweet').to eq true
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'invalidates on seeing the same tweet again' do
|
44
|
+
described_class.validate 'This is a tweet'
|
45
|
+
expect(described_class.validate 'This is a tweet').to eq false
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'invalidates similar tweets with different URLs' do
|
49
|
+
described_class.validate 'This is a tweet with https://foo.bar/abcd'
|
50
|
+
expect(described_class.validate 'This is a tweet with https://foo.bar/xyz').to be false
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'invalidates similar tweets laden with hashtags' do
|
54
|
+
described_class.validate 'This is a tweet'
|
55
|
+
expect(described_class.validate 'This is a tweet #loaded #with #hashtags').to be false
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'validates and invalidates correctly' do
|
59
|
+
corpus = [
|
60
|
+
'This is a tweet with #hashtag https://derp.com/abc #trailing #tags',
|
61
|
+
'This is a different tweet',
|
62
|
+
'#Needless #hashtags tacked on to #this tweet'
|
63
|
+
]
|
64
|
+
corpus.each do |tweet|
|
65
|
+
described_class.validate tweet
|
66
|
+
end
|
67
|
+
|
68
|
+
test_cases = {
|
69
|
+
'This one should be fine' => true,
|
70
|
+
'This is a different tweet #with #hashtags' => false,
|
71
|
+
'#Different #tags tacked on to #this tweet #here' => false,
|
72
|
+
'This is a tweet with #hashtag http://what.even/' => false,
|
73
|
+
'This is a tweet with #hashtag http://what.even/xyz #derp' => false
|
74
|
+
}
|
75
|
+
test_cases.each_pair do |tweet, expectation|
|
76
|
+
expect(described_class.validate tweet).to eq expectation
|
77
|
+
end
|
78
|
+
|
79
|
+
expect(described_class.retrieve.to_a.sort).to eq [
|
80
|
+
"This is a different tweet",
|
81
|
+
"This is a tweet with #hashtag __URL__",
|
82
|
+
"This one should be fine",
|
83
|
+
"tacked on to #this tweet"
|
84
|
+
]
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'saves a set' do
|
88
|
+
set = Set.new [1, 2, 3]
|
89
|
+
described_class.save set
|
90
|
+
|
91
|
+
expect(Marshal.load File.open Config.instance.config.seen_tweets).to be_a Set
|
92
|
+
expect(Marshal.load File.open Config.instance.config.seen_tweets).to include 1
|
93
|
+
expect(Marshal.load File.open Config.instance.config.seen_tweets).to include 3
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|