twitter_ebooks 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +78 -0
- data/NOTES.md +4 -0
- data/README.md +20 -0
- data/bin/ebooks +83 -0
- data/data/adjectives.txt +1466 -0
- data/data/nouns.txt +2193 -0
- data/data/stopwords.txt +639 -0
- data/lib/twitter_ebooks/archiver.rb +86 -0
- data/lib/twitter_ebooks/bot.rb +145 -0
- data/lib/twitter_ebooks/markov.rb +89 -0
- data/lib/twitter_ebooks/model.rb +147 -0
- data/lib/twitter_ebooks/nlp.rb +142 -0
- data/lib/twitter_ebooks/version.rb +3 -0
- data/lib/twitter_ebooks.rb +20 -0
- data/skeleton/Procfile +1 -0
- data/skeleton/bots.rb +47 -0
- data/skeleton/corpus/README.md +1 -0
- data/skeleton/model/README.md +1 -0
- data/test/corpus/0xabad1dea.tweets +14696 -0
- data/test/tokenize.rb +18 -0
- data/twitter_ebooks.gemspec +30 -0
- metadata +247 -0
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
.*.swp
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
twitter_ebooks (2.0.0)
|
5
|
+
engtagger
|
6
|
+
gingerice
|
7
|
+
htmlentities
|
8
|
+
linguistics
|
9
|
+
ruby-stemmer
|
10
|
+
rufus-scheduler
|
11
|
+
tactful_tokenizer
|
12
|
+
tokenizer
|
13
|
+
tweetstream
|
14
|
+
twitter
|
15
|
+
|
16
|
+
GEM
|
17
|
+
remote: https://rubygems.org/
|
18
|
+
specs:
|
19
|
+
addressable (2.3.5)
|
20
|
+
atomic (1.1.14)
|
21
|
+
awesome_print (1.2.0)
|
22
|
+
cookiejar (0.3.0)
|
23
|
+
daemons (1.1.9)
|
24
|
+
em-http-request (1.0.3)
|
25
|
+
addressable (>= 2.2.3)
|
26
|
+
cookiejar
|
27
|
+
em-socksify
|
28
|
+
eventmachine (>= 1.0.0.beta.4)
|
29
|
+
http_parser.rb (>= 0.5.3)
|
30
|
+
em-socksify (0.3.0)
|
31
|
+
eventmachine (>= 1.0.0.beta.4)
|
32
|
+
em-twitter (0.2.2)
|
33
|
+
eventmachine (~> 1.0)
|
34
|
+
http_parser.rb (~> 0.5)
|
35
|
+
simple_oauth (~> 0.1)
|
36
|
+
engtagger (0.1.2)
|
37
|
+
eventmachine (1.0.3)
|
38
|
+
faraday (0.8.8)
|
39
|
+
multipart-post (~> 1.2.0)
|
40
|
+
gingerice (1.2.1)
|
41
|
+
addressable
|
42
|
+
awesome_print
|
43
|
+
htmlentities (4.3.1)
|
44
|
+
http_parser.rb (0.5.3)
|
45
|
+
linguistics (2.0.2)
|
46
|
+
loggability (~> 0.5)
|
47
|
+
loggability (0.8.1)
|
48
|
+
minitest (5.0.8)
|
49
|
+
multi_json (1.8.2)
|
50
|
+
multipart-post (1.2.0)
|
51
|
+
ruby-stemmer (0.9.3)
|
52
|
+
rufus-scheduler (3.0.2)
|
53
|
+
tzinfo
|
54
|
+
simple_oauth (0.2.0)
|
55
|
+
tactful_tokenizer (0.0.2)
|
56
|
+
thread_safe (0.1.3)
|
57
|
+
atomic
|
58
|
+
tokenizer (0.1.1)
|
59
|
+
tweetstream (2.5.0)
|
60
|
+
daemons (~> 1.1)
|
61
|
+
em-http-request (~> 1.0.2)
|
62
|
+
em-twitter (~> 0.2)
|
63
|
+
twitter (~> 4.5)
|
64
|
+
yajl-ruby (~> 1.1)
|
65
|
+
twitter (4.8.1)
|
66
|
+
faraday (~> 0.8, < 0.10)
|
67
|
+
multi_json (~> 1.0)
|
68
|
+
simple_oauth (~> 0.2)
|
69
|
+
tzinfo (1.1.0)
|
70
|
+
thread_safe (~> 0.1)
|
71
|
+
yajl-ruby (1.1.0)
|
72
|
+
|
73
|
+
PLATFORMS
|
74
|
+
ruby
|
75
|
+
|
76
|
+
DEPENDENCIES
|
77
|
+
minitest
|
78
|
+
twitter_ebooks!
|
data/NOTES.md
ADDED
data/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# twitter\_ebooks 2.0.0
|
2
|
+
|
3
|
+
Complete rewrite of twitter\_ebooks. Allows context-sensitive responsive bots via the Twitter streaming API, along with higher-quality tokenization and ngram modeling.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
gem install twitter_ebooks
|
9
|
+
```
|
10
|
+
|
11
|
+
## Making a bot
|
12
|
+
|
13
|
+
twitter\_ebooks uses a Rails-like skeleton app generator. Let's say we want to make a revolutionary Marxist bot based on the writings of Leon Trotsky (who doesn't?):
|
14
|
+
|
15
|
+
```bash
|
16
|
+
ebooks new trotsky_ebooks
|
17
|
+
cd trotsky_ebooks
|
18
|
+
```
|
19
|
+
|
20
|
+
|
data/bin/ebooks
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'twitter_ebooks'
|
4
|
+
|
5
|
+
module Ebooks
|
6
|
+
APP_PATH = Dir.pwd # XXX do some recursive thing instead
|
7
|
+
|
8
|
+
def self.new(target)
|
9
|
+
usage = "Usage: ebooks new <reponame>"
|
10
|
+
|
11
|
+
if target.nil?
|
12
|
+
log usage
|
13
|
+
exit
|
14
|
+
end
|
15
|
+
|
16
|
+
target = "./#{reponame}"
|
17
|
+
|
18
|
+
if File.exists?(target)
|
19
|
+
log "#{target} already exists. Please remove if you want to recreate."
|
20
|
+
exit
|
21
|
+
end
|
22
|
+
|
23
|
+
FileUtils.cp_r(SKELETON_PATH, target)
|
24
|
+
|
25
|
+
File.open(File.join(target, 'bots.rb'), 'w') do |f|
|
26
|
+
template = File.read(File.join(SKELETON_PATH, 'bots.rb'))
|
27
|
+
f.write(template.gsub("{{BOT_NAME}}", reponame))
|
28
|
+
end
|
29
|
+
|
30
|
+
log "New twitter_ebooks app created at #{target}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.consume(path)
|
34
|
+
filename = File.basename(path)
|
35
|
+
shortname = filename.split('.')[0..-2].join('.')
|
36
|
+
hash = Digest::MD5.hexdigest(File.read(path))
|
37
|
+
|
38
|
+
log "Consuming text corpus: #{filename}"
|
39
|
+
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
|
40
|
+
Model.consume(path).save(outpath)
|
41
|
+
log "Corpus consumed"
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.gen(model_path, input)
|
45
|
+
require 'benchmark'
|
46
|
+
model = nil;
|
47
|
+
puts Benchmark.measure {
|
48
|
+
model = Model.load(model_path)
|
49
|
+
}
|
50
|
+
if input && !input.empty?
|
51
|
+
puts "@cmd " + model.markov_response(input, 135)
|
52
|
+
else
|
53
|
+
puts model.markov_statement
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.archive(username, outpath)
|
58
|
+
Archiver.new(username, outpath).fetch_tweets
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.command(args)
|
62
|
+
usage = """Usage:
|
63
|
+
ebooks new <reponame>
|
64
|
+
ebooks consume <corpus_path>
|
65
|
+
ebooks gen <model> [input]
|
66
|
+
ebooks archive <@user> <outpath>
|
67
|
+
"""
|
68
|
+
|
69
|
+
if args.length == 0
|
70
|
+
log usage
|
71
|
+
exit
|
72
|
+
end
|
73
|
+
|
74
|
+
case args[0]
|
75
|
+
when "new" then new(args[1])
|
76
|
+
when "consume" then consume(args[1])
|
77
|
+
when "gen" then gen(args[1], args[2..-1].join(' '))
|
78
|
+
when "archive" then archive(args[1], args[2])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
Ebooks.command(ARGV)
|