twitter_ebooks 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +78 -0
- data/NOTES.md +4 -0
- data/README.md +20 -0
- data/bin/ebooks +83 -0
- data/data/adjectives.txt +1466 -0
- data/data/nouns.txt +2193 -0
- data/data/stopwords.txt +639 -0
- data/lib/twitter_ebooks/archiver.rb +86 -0
- data/lib/twitter_ebooks/bot.rb +145 -0
- data/lib/twitter_ebooks/markov.rb +89 -0
- data/lib/twitter_ebooks/model.rb +147 -0
- data/lib/twitter_ebooks/nlp.rb +142 -0
- data/lib/twitter_ebooks/version.rb +3 -0
- data/lib/twitter_ebooks.rb +20 -0
- data/skeleton/Procfile +1 -0
- data/skeleton/bots.rb +47 -0
- data/skeleton/corpus/README.md +1 -0
- data/skeleton/model/README.md +1 -0
- data/test/corpus/0xabad1dea.tweets +14696 -0
- data/test/tokenize.rb +18 -0
- data/twitter_ebooks.gemspec +30 -0
- metadata +247 -0
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
.*.swp
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
twitter_ebooks (2.0.0)
|
5
|
+
engtagger
|
6
|
+
gingerice
|
7
|
+
htmlentities
|
8
|
+
linguistics
|
9
|
+
ruby-stemmer
|
10
|
+
rufus-scheduler
|
11
|
+
tactful_tokenizer
|
12
|
+
tokenizer
|
13
|
+
tweetstream
|
14
|
+
twitter
|
15
|
+
|
16
|
+
GEM
|
17
|
+
remote: https://rubygems.org/
|
18
|
+
specs:
|
19
|
+
addressable (2.3.5)
|
20
|
+
atomic (1.1.14)
|
21
|
+
awesome_print (1.2.0)
|
22
|
+
cookiejar (0.3.0)
|
23
|
+
daemons (1.1.9)
|
24
|
+
em-http-request (1.0.3)
|
25
|
+
addressable (>= 2.2.3)
|
26
|
+
cookiejar
|
27
|
+
em-socksify
|
28
|
+
eventmachine (>= 1.0.0.beta.4)
|
29
|
+
http_parser.rb (>= 0.5.3)
|
30
|
+
em-socksify (0.3.0)
|
31
|
+
eventmachine (>= 1.0.0.beta.4)
|
32
|
+
em-twitter (0.2.2)
|
33
|
+
eventmachine (~> 1.0)
|
34
|
+
http_parser.rb (~> 0.5)
|
35
|
+
simple_oauth (~> 0.1)
|
36
|
+
engtagger (0.1.2)
|
37
|
+
eventmachine (1.0.3)
|
38
|
+
faraday (0.8.8)
|
39
|
+
multipart-post (~> 1.2.0)
|
40
|
+
gingerice (1.2.1)
|
41
|
+
addressable
|
42
|
+
awesome_print
|
43
|
+
htmlentities (4.3.1)
|
44
|
+
http_parser.rb (0.5.3)
|
45
|
+
linguistics (2.0.2)
|
46
|
+
loggability (~> 0.5)
|
47
|
+
loggability (0.8.1)
|
48
|
+
minitest (5.0.8)
|
49
|
+
multi_json (1.8.2)
|
50
|
+
multipart-post (1.2.0)
|
51
|
+
ruby-stemmer (0.9.3)
|
52
|
+
rufus-scheduler (3.0.2)
|
53
|
+
tzinfo
|
54
|
+
simple_oauth (0.2.0)
|
55
|
+
tactful_tokenizer (0.0.2)
|
56
|
+
thread_safe (0.1.3)
|
57
|
+
atomic
|
58
|
+
tokenizer (0.1.1)
|
59
|
+
tweetstream (2.5.0)
|
60
|
+
daemons (~> 1.1)
|
61
|
+
em-http-request (~> 1.0.2)
|
62
|
+
em-twitter (~> 0.2)
|
63
|
+
twitter (~> 4.5)
|
64
|
+
yajl-ruby (~> 1.1)
|
65
|
+
twitter (4.8.1)
|
66
|
+
faraday (~> 0.8, < 0.10)
|
67
|
+
multi_json (~> 1.0)
|
68
|
+
simple_oauth (~> 0.2)
|
69
|
+
tzinfo (1.1.0)
|
70
|
+
thread_safe (~> 0.1)
|
71
|
+
yajl-ruby (1.1.0)
|
72
|
+
|
73
|
+
PLATFORMS
|
74
|
+
ruby
|
75
|
+
|
76
|
+
DEPENDENCIES
|
77
|
+
minitest
|
78
|
+
twitter_ebooks!
|
data/NOTES.md
ADDED
data/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# twitter\_ebooks 2.0.0
|
2
|
+
|
3
|
+
Complete rewrite of twitter\_ebooks. Allows context-sensitive responsive bots via the Twitter streaming API, along with higher-quality tokenization and ngram modeling.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
gem install twitter_ebooks
|
9
|
+
```
|
10
|
+
|
11
|
+
## Making a bot
|
12
|
+
|
13
|
+
twitter\_ebooks uses a Rails-like skeleton app generator. Let's say we want to make a revolutionary Marxist bot based on the writings of Leon Trotsky (who doesn't?):
|
14
|
+
|
15
|
+
```bash
|
16
|
+
ebooks new trotsky_ebooks
|
17
|
+
cd trotsky_ebooks
|
18
|
+
```
|
19
|
+
|
20
|
+
|
data/bin/ebooks
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'twitter_ebooks'
|
4
|
+
|
5
|
+
module Ebooks
|
6
|
+
APP_PATH = Dir.pwd # XXX do some recursive thing instead
|
7
|
+
|
8
|
+
def self.new(target)
|
9
|
+
usage = "Usage: ebooks new <reponame>"
|
10
|
+
|
11
|
+
if target.nil?
|
12
|
+
log usage
|
13
|
+
exit
|
14
|
+
end
|
15
|
+
|
16
|
+
target = "./#{reponame}"
|
17
|
+
|
18
|
+
if File.exists?(target)
|
19
|
+
log "#{target} already exists. Please remove if you want to recreate."
|
20
|
+
exit
|
21
|
+
end
|
22
|
+
|
23
|
+
FileUtils.cp_r(SKELETON_PATH, target)
|
24
|
+
|
25
|
+
File.open(File.join(target, 'bots.rb'), 'w') do |f|
|
26
|
+
template = File.read(File.join(SKELETON_PATH, 'bots.rb'))
|
27
|
+
f.write(template.gsub("{{BOT_NAME}}", reponame))
|
28
|
+
end
|
29
|
+
|
30
|
+
log "New twitter_ebooks app created at #{target}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.consume(path)
|
34
|
+
filename = File.basename(path)
|
35
|
+
shortname = filename.split('.')[0..-2].join('.')
|
36
|
+
hash = Digest::MD5.hexdigest(File.read(path))
|
37
|
+
|
38
|
+
log "Consuming text corpus: #{filename}"
|
39
|
+
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
|
40
|
+
Model.consume(path).save(outpath)
|
41
|
+
log "Corpus consumed"
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.gen(model_path, input)
|
45
|
+
require 'benchmark'
|
46
|
+
model = nil;
|
47
|
+
puts Benchmark.measure {
|
48
|
+
model = Model.load(model_path)
|
49
|
+
}
|
50
|
+
if input && !input.empty?
|
51
|
+
puts "@cmd " + model.markov_response(input, 135)
|
52
|
+
else
|
53
|
+
puts model.markov_statement
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.archive(username, outpath)
|
58
|
+
Archiver.new(username, outpath).fetch_tweets
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.command(args)
|
62
|
+
usage = """Usage:
|
63
|
+
ebooks new <reponame>
|
64
|
+
ebooks consume <corpus_path>
|
65
|
+
ebooks gen <model> [input]
|
66
|
+
ebooks archive <@user> <outpath>
|
67
|
+
"""
|
68
|
+
|
69
|
+
if args.length == 0
|
70
|
+
log usage
|
71
|
+
exit
|
72
|
+
end
|
73
|
+
|
74
|
+
case args[0]
|
75
|
+
when "new" then new(args[1])
|
76
|
+
when "consume" then consume(args[1])
|
77
|
+
when "gen" then gen(args[1], args[2..-1].join(' '))
|
78
|
+
when "archive" then archive(args[1], args[2])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
Ebooks.command(ARGV)
|