twitter_ebooks 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ .*.swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in libtcod.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,78 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ twitter_ebooks (2.0.0)
5
+ engtagger
6
+ gingerice
7
+ htmlentities
8
+ linguistics
9
+ ruby-stemmer
10
+ rufus-scheduler
11
+ tactful_tokenizer
12
+ tokenizer
13
+ tweetstream
14
+ twitter
15
+
16
+ GEM
17
+ remote: https://rubygems.org/
18
+ specs:
19
+ addressable (2.3.5)
20
+ atomic (1.1.14)
21
+ awesome_print (1.2.0)
22
+ cookiejar (0.3.0)
23
+ daemons (1.1.9)
24
+ em-http-request (1.0.3)
25
+ addressable (>= 2.2.3)
26
+ cookiejar
27
+ em-socksify
28
+ eventmachine (>= 1.0.0.beta.4)
29
+ http_parser.rb (>= 0.5.3)
30
+ em-socksify (0.3.0)
31
+ eventmachine (>= 1.0.0.beta.4)
32
+ em-twitter (0.2.2)
33
+ eventmachine (~> 1.0)
34
+ http_parser.rb (~> 0.5)
35
+ simple_oauth (~> 0.1)
36
+ engtagger (0.1.2)
37
+ eventmachine (1.0.3)
38
+ faraday (0.8.8)
39
+ multipart-post (~> 1.2.0)
40
+ gingerice (1.2.1)
41
+ addressable
42
+ awesome_print
43
+ htmlentities (4.3.1)
44
+ http_parser.rb (0.5.3)
45
+ linguistics (2.0.2)
46
+ loggability (~> 0.5)
47
+ loggability (0.8.1)
48
+ minitest (5.0.8)
49
+ multi_json (1.8.2)
50
+ multipart-post (1.2.0)
51
+ ruby-stemmer (0.9.3)
52
+ rufus-scheduler (3.0.2)
53
+ tzinfo
54
+ simple_oauth (0.2.0)
55
+ tactful_tokenizer (0.0.2)
56
+ thread_safe (0.1.3)
57
+ atomic
58
+ tokenizer (0.1.1)
59
+ tweetstream (2.5.0)
60
+ daemons (~> 1.1)
61
+ em-http-request (~> 1.0.2)
62
+ em-twitter (~> 0.2)
63
+ twitter (~> 4.5)
64
+ yajl-ruby (~> 1.1)
65
+ twitter (4.8.1)
66
+ faraday (~> 0.8, < 0.10)
67
+ multi_json (~> 1.0)
68
+ simple_oauth (~> 0.2)
69
+ tzinfo (1.1.0)
70
+ thread_safe (~> 0.1)
71
+ yajl-ruby (1.1.0)
72
+
73
+ PLATFORMS
74
+ ruby
75
+
76
+ DEPENDENCIES
77
+ minitest
78
+ twitter_ebooks!
data/NOTES.md ADDED
@@ -0,0 +1,4 @@
1
+ - Files in text/ are preprocessed by `rake consume` and serialized
2
+ - e.g. text/foo.tweets becomes consumed/foo.corpus
3
+ - `rake consume` looks at hashes to know which it needs to update
4
+ - Preprocessed corpus files are loaded at runtime by Corpus.load('foo')
data/README.md ADDED
@@ -0,0 +1,20 @@
1
+ # twitter\_ebooks 2.0.0
2
+
3
+ Complete rewrite of twitter\_ebooks. Allows context-sensitive responsive bots via the Twitter streaming API, along with higher-quality tokenization and ngram modeling.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ gem install twitter_ebooks
9
+ ```
10
+
11
+ ## Making a bot
12
+
13
+ twitter\_ebooks uses a Rails-like skeleton app generator. Let's say we want to make a revolutionary Marxist bot based on the writings of Leon Trotsky (who doesn't?):
14
+
15
+ ```bash
16
+ ebooks new trotsky_ebooks
17
+ cd trotsky_ebooks
18
+ ```
19
+
20
+
data/bin/ebooks ADDED
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'twitter_ebooks'
4
+
5
+ module Ebooks
6
+ APP_PATH = Dir.pwd # XXX do some recursive thing instead
7
+
8
+ def self.new(target)
9
+ usage = "Usage: ebooks new <reponame>"
10
+
11
+ if target.nil?
12
+ log usage
13
+ exit
14
+ end
15
+
16
+ target = "./#{reponame}"
17
+
18
+ if File.exists?(target)
19
+ log "#{target} already exists. Please remove if you want to recreate."
20
+ exit
21
+ end
22
+
23
+ FileUtils.cp_r(SKELETON_PATH, target)
24
+
25
+ File.open(File.join(target, 'bots.rb'), 'w') do |f|
26
+ template = File.read(File.join(SKELETON_PATH, 'bots.rb'))
27
+ f.write(template.gsub("{{BOT_NAME}}", reponame))
28
+ end
29
+
30
+ log "New twitter_ebooks app created at #{target}"
31
+ end
32
+
33
+ def self.consume(path)
34
+ filename = File.basename(path)
35
+ shortname = filename.split('.')[0..-2].join('.')
36
+ hash = Digest::MD5.hexdigest(File.read(path))
37
+
38
+ log "Consuming text corpus: #{filename}"
39
+ outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
40
+ Model.consume(path).save(outpath)
41
+ log "Corpus consumed"
42
+ end
43
+
44
+ def self.gen(model_path, input)
45
+ require 'benchmark'
46
+ model = nil;
47
+ puts Benchmark.measure {
48
+ model = Model.load(model_path)
49
+ }
50
+ if input && !input.empty?
51
+ puts "@cmd " + model.markov_response(input, 135)
52
+ else
53
+ puts model.markov_statement
54
+ end
55
+ end
56
+
57
+ def self.archive(username, outpath)
58
+ Archiver.new(username, outpath).fetch_tweets
59
+ end
60
+
61
+ def self.command(args)
62
+ usage = """Usage:
63
+ ebooks new <reponame>
64
+ ebooks consume <corpus_path>
65
+ ebooks gen <model> [input]
66
+ ebooks archive <@user> <outpath>
67
+ """
68
+
69
+ if args.length == 0
70
+ log usage
71
+ exit
72
+ end
73
+
74
+ case args[0]
75
+ when "new" then new(args[1])
76
+ when "consume" then consume(args[1])
77
+ when "gen" then gen(args[1], args[2..-1].join(' '))
78
+ when "archive" then archive(args[1], args[2])
79
+ end
80
+ end
81
+ end
82
+
83
+ Ebooks.command(ARGV)