replacer_bot 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1e8d3be70c7dfae8451a8a7e6655bb74bb545a9
|
4
|
+
data.tar.gz: 062f05a23f866b7eb168f95160b7dc5d07a4632d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 830a1b44dfc123d1ea276ef70deecd5f80276dd9900f43bc6d6c3d7024f87387b0b2acedd26197b028ba8d83509438bb354366bd555219fcf97aa5cf4725c04a
|
7
|
+
data.tar.gz: 1147ca7c0ec8a6dc588f68535d4fcd0d59f0d41d6844b87197a7e07a6df7a5a3b716a77be82506c4a3dd35b7b0c5e0443d9c4ac759f27d6644a0d16bbdc9c113
|
data/config/defaults.yml
CHANGED
@@ -3,13 +3,37 @@ module ReplacerBot
|
|
3
3
|
def self.validate tweet
|
4
4
|
archive = retrieve
|
5
5
|
t = sanitise tweet
|
6
|
-
valid = not(archive.include? t)
|
6
|
+
valid = not(archive.include? t) && not(similar_to_archive tweet, archive)
|
7
7
|
archive.add t
|
8
8
|
save archive
|
9
9
|
|
10
10
|
valid
|
11
11
|
end
|
12
12
|
|
13
|
+
def self.similar_to_archive tweet, archive
|
14
|
+
match = false
|
15
|
+
|
16
|
+
archive.each do |archived_tweet|
|
17
|
+
match = true if similar(tweet, archived_tweet)
|
18
|
+
end
|
19
|
+
|
20
|
+
match
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.similar tweet, other_tweet, weighting: Config.instance.config.similarity_weighting
|
24
|
+
tweet_words = tweet.split ' '
|
25
|
+
return false if tweet_words.count < weighting
|
26
|
+
|
27
|
+
match = false
|
28
|
+
|
29
|
+
(tweet_words.count - (weighting - 1)).times do |i|
|
30
|
+
sample = tweet_words[i, weighting].join(' ').downcase
|
31
|
+
match = true if sanitise(other_tweet.downcase).index sanitise(sample)
|
32
|
+
end
|
33
|
+
|
34
|
+
match
|
35
|
+
end
|
36
|
+
|
13
37
|
def self.retrieve
|
14
38
|
begin
|
15
39
|
Marshal.load File.open Config.instance.config.seen_tweets
|
data/lib/replacer_bot/version.rb
CHANGED
@@ -60,7 +60,7 @@ module ReplacerBot
|
|
60
60
|
|
61
61
|
it 'filters similar tweets', :vcr do
|
62
62
|
SeenTweets.validate 'How open data can help save lives http://t.co/90U7bVq5UF'
|
63
|
-
expect(replacer.tweets.count).to eq
|
63
|
+
expect(replacer.tweets.count).to eq 16
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
@@ -75,7 +75,7 @@ module ReplacerBot
|
|
75
75
|
end
|
76
76
|
|
77
77
|
it 'actually sends tweets', :vcr do
|
78
|
-
expect(replacer.client).to(receive(:update)).exactly(
|
78
|
+
expect(replacer.client).to(receive(:update)).exactly(16).times
|
79
79
|
interval = replacer.config.interval
|
80
80
|
replacer.config.interval = 0
|
81
81
|
replacer.tweet
|
@@ -84,6 +84,30 @@ module ReplacerBot
|
|
84
84
|
]
|
85
85
|
end
|
86
86
|
|
87
|
+
context 'overlap of words' do
|
88
|
+
# n is set in the default config, a lower value makes the bot less noisy at the risk of false negatives
|
89
|
+
it 'does not match on tweets with fewer than n words' do
|
90
|
+
expect(described_class.similar 'appears to match', 'You would think this appears to match').to eq false
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'sees tweets which overlap by at least n words as similar' do
|
94
|
+
expect(described_class.similar 'This is a string of words', 'Also this is a string of words innit').to eq true
|
95
|
+
expect(described_class.similar 'This is a string of words', 'Also this is a similar string similar words innit').to eq false
|
96
|
+
expect(described_class.similar 'This one will be a definite match ', 'So this one will be a definite match no doubt').to eq true
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'deals sensibly with URLs and hashtags' do
|
100
|
+
expect(described_class.similar 'This one has a http://taylor.swift in it', 'So this one has a http://other.url/ in it here').to eq true
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'works on real-world data' do
|
104
|
+
expect(described_class.
|
105
|
+
similar 'Netflix Releases Taylor Swift-Fetching Developer Preview: Netflix has released a developer preview of its in-house… bit.ly/1JfRdgA',
|
106
|
+
'Netflix Releases Taylor Swift-Fetching Developer Preview - Netflix has released a developer preview of its in-house d...'
|
107
|
+
).to eq true
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
87
111
|
it 'saves a set' do
|
88
112
|
set = Set.new [1, 2, 3]
|
89
113
|
described_class.save set
|