replacer_bot 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1e8d3be70c7dfae8451a8a7e6655bb74bb545a9
|
4
|
+
data.tar.gz: 062f05a23f866b7eb168f95160b7dc5d07a4632d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 830a1b44dfc123d1ea276ef70deecd5f80276dd9900f43bc6d6c3d7024f87387b0b2acedd26197b028ba8d83509438bb354366bd555219fcf97aa5cf4725c04a
|
7
|
+
data.tar.gz: 1147ca7c0ec8a6dc588f68535d4fcd0d59f0d41d6844b87197a7e07a6df7a5a3b716a77be82506c4a3dd35b7b0c5e0443d9c4ac759f27d6644a0d16bbdc9c113
|
data/config/defaults.yml
CHANGED
@@ -3,13 +3,37 @@ module ReplacerBot
|
|
3
3
|
def self.validate tweet
|
4
4
|
archive = retrieve
|
5
5
|
t = sanitise tweet
|
6
|
-
valid = not(archive.include? t)
|
6
|
+
valid = not(archive.include? t) && not(similar_to_archive tweet, archive)
|
7
7
|
archive.add t
|
8
8
|
save archive
|
9
9
|
|
10
10
|
valid
|
11
11
|
end
|
12
12
|
|
13
|
+
def self.similar_to_archive tweet, archive
|
14
|
+
match = false
|
15
|
+
|
16
|
+
archive.each do |archived_tweet|
|
17
|
+
match = true if similar(tweet, archived_tweet)
|
18
|
+
end
|
19
|
+
|
20
|
+
match
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.similar tweet, other_tweet, weighting: Config.instance.config.similarity_weighting
|
24
|
+
tweet_words = tweet.split ' '
|
25
|
+
return false if tweet_words.count < weighting
|
26
|
+
|
27
|
+
match = false
|
28
|
+
|
29
|
+
(tweet_words.count - (weighting - 1)).times do |i|
|
30
|
+
sample = tweet_words[i, weighting].join(' ').downcase
|
31
|
+
match = true if sanitise(other_tweet.downcase).index sanitise(sample)
|
32
|
+
end
|
33
|
+
|
34
|
+
match
|
35
|
+
end
|
36
|
+
|
13
37
|
def self.retrieve
|
14
38
|
begin
|
15
39
|
Marshal.load File.open Config.instance.config.seen_tweets
|
data/lib/replacer_bot/version.rb
CHANGED
@@ -60,7 +60,7 @@ module ReplacerBot
|
|
60
60
|
|
61
61
|
it 'filters similar tweets', :vcr do
|
62
62
|
SeenTweets.validate 'How open data can help save lives http://t.co/90U7bVq5UF'
|
63
|
-
expect(replacer.tweets.count).to eq
|
63
|
+
expect(replacer.tweets.count).to eq 16
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
@@ -75,7 +75,7 @@ module ReplacerBot
|
|
75
75
|
end
|
76
76
|
|
77
77
|
it 'actually sends tweets', :vcr do
|
78
|
-
expect(replacer.client).to(receive(:update)).exactly(
|
78
|
+
expect(replacer.client).to(receive(:update)).exactly(16).times
|
79
79
|
interval = replacer.config.interval
|
80
80
|
replacer.config.interval = 0
|
81
81
|
replacer.tweet
|
@@ -84,6 +84,30 @@ module ReplacerBot
|
|
84
84
|
]
|
85
85
|
end
|
86
86
|
|
87
|
+
context 'overlap of words' do
|
88
|
+
# n is set in the default config, a lower value makes the bot less noisy at the risk of false negatives
|
89
|
+
it 'does not match on tweets with fewer than n words' do
|
90
|
+
expect(described_class.similar 'appears to match', 'You would think this appears to match').to eq false
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'sees tweets which overlap by at least n words as similar' do
|
94
|
+
expect(described_class.similar 'This is a string of words', 'Also this is a string of words innit').to eq true
|
95
|
+
expect(described_class.similar 'This is a string of words', 'Also this is a similar string similar words innit').to eq false
|
96
|
+
expect(described_class.similar 'This one will be a definite match ', 'So this one will be a definite match no doubt').to eq true
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'deals sensibly with URLs and hashtags' do
|
100
|
+
expect(described_class.similar 'This one has a http://taylor.swift in it', 'So this one has a http://other.url/ in it here').to eq true
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'works on real-world data' do
|
104
|
+
expect(described_class.
|
105
|
+
similar 'Netflix Releases Taylor Swift-Fetching Developer Preview: Netflix has released a developer preview of its in-house… bit.ly/1JfRdgA',
|
106
|
+
'Netflix Releases Taylor Swift-Fetching Developer Preview - Netflix has released a developer preview of its in-house d...'
|
107
|
+
).to eq true
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
87
111
|
it 'saves a set' do
|
88
112
|
set = Set.new [1, 2, 3]
|
89
113
|
described_class.save set
|