twitter_ebooks 2.1.2 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/Gemfile.lock CHANGED
File without changes
data/LICENSE CHANGED
File without changes
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # twitter\_ebooks 2.1.2
1
+ # twitter\_ebooks 2.1.3
2
2
 
3
3
  Complete rewrite of twitter\_ebooks. Allows context-sensitive responsive bots via the Twitter streaming API, along with higher-quality ngram modeling. Still needs a bit of cleaning and documenting.
4
4
 
data/Rakefile CHANGED
File without changes
data/bin/ebooks CHANGED
@@ -60,7 +60,7 @@ module Ebooks
60
60
  end
61
61
 
62
62
  def self.archive(username, outpath)
63
- Archiver.new(username, outpath).fetch_tweets
63
+ Archive.new(username, outpath).sync
64
64
  end
65
65
 
66
66
  def self.tweet(modelpath, username)
@@ -73,6 +73,31 @@ module Ebooks
73
73
  bot.tweet(statement)
74
74
  end
75
75
 
76
+ def self.jsonify(old_path, new_path)
77
+ name = File.basename(old_path).split('.')[0]
78
+ new_path ||= name + ".json"
79
+
80
+ tweets = []
81
+ id = nil
82
+ File.read(old_path).split("\n").each do |l|
83
+ if l.start_with?('# ')
84
+ id = l.split('# ')[-1]
85
+ else
86
+ tweet = { text: l }
87
+ if id
88
+ tweet[:id] = id
89
+ id = nil
90
+ end
91
+ tweets << tweet
92
+ end
93
+ end
94
+
95
+ File.open(new_path, 'w') do |f|
96
+ log "Writing #{tweets.length} tweets to #{new_path}"
97
+ f.write(JSON.pretty_generate(tweets))
98
+ end
99
+ end
100
+
76
101
  def self.command(args)
77
102
  usage = """Usage:
78
103
  ebooks new <reponame>
@@ -81,6 +106,7 @@ module Ebooks
81
106
  ebooks score <model_path> <input>
82
107
  ebooks archive <@user> <outpath>
83
108
  ebooks tweet <model_path> <@bot>
109
+ ebooks jsonify <old_corpus_path> [new_corpus_path]
84
110
  """
85
111
 
86
112
  if args.length == 0
@@ -95,6 +121,7 @@ module Ebooks
95
121
  when "score" then score(args[1], args[2..-1].join(' '))
96
122
  when "archive" then archive(args[1], args[2])
97
123
  when "tweet" then tweet(args[1], args[2])
124
+ when "jsonify" then jsonify(args[1], args[2])
98
125
  end
99
126
  end
100
127
  end
data/data/adjectives.txt CHANGED
File without changes
data/data/nouns.txt CHANGED
File without changes
data/data/stopwords.txt CHANGED
File without changes
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'twitter'
5
+ require 'json'
6
+
7
+ CONFIG_PATH = "/home/#{ENV['USER']}/.ebooksrc"
8
+
9
+ module Ebooks
10
+ class Archive
11
+ attr_reader :tweets
12
+
13
+ def make_client
14
+ if File.exists?(CONFIG_PATH)
15
+ @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
16
+ else
17
+ @config = {}
18
+
19
+ puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
20
+ print "Consumer key: "
21
+ @config[:consumer_key] = STDIN.gets.chomp
22
+ print "Consumer secret: "
23
+ @config[:consumer_secret] = STDIN.gets.chomp
24
+ print "Oauth token: "
25
+ @config[:oauth_token] = STDIN.gets.chomp
26
+ print "Oauth secret: "
27
+ @config[:oauth_token_secret] = STDIN.gets.chomp
28
+
29
+ File.open(CONFIG_PATH, 'w') do |f|
30
+ f.write(JSON.pretty_generate(@config))
31
+ end
32
+ end
33
+
34
+ Twitter.configure do |config|
35
+ config.consumer_key = @config[:consumer_key]
36
+ config.consumer_secret = @config[:consumer_secret]
37
+ config.oauth_token = @config[:oauth_token]
38
+ config.oauth_token_secret = @config[:oauth_token_secret]
39
+ end
40
+
41
+ Twitter::Client.new
42
+ end
43
+
44
+ def initialize(username, path, client=nil)
45
+ @username = username
46
+ @path = path || "#{username}.json"
47
+ @client = client || make_client
48
+
49
+ if File.exists?(@path)
50
+ @tweets = JSON.parse(File.read(@path), symbolize_names: true)
51
+ log "Currently #{@tweets.length} tweets for #{@username}"
52
+ else
53
+ @tweets.nil?
54
+ log "New archive for @#{username} at #{@path}"
55
+ end
56
+ end
57
+
58
+ def sync
59
+ retries = 0
60
+ tweets = []
61
+ max_id = nil
62
+
63
+ opts = {
64
+ count: 200,
65
+ #include_rts: false,
66
+ trim_user: true
67
+ }
68
+
69
+ opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
70
+
71
+ loop do
72
+ opts[:max_id] = max_id unless max_id.nil?
73
+ new = @client.user_timeline(@username, opts)
74
+ break if new.length <= 1
75
+ tweets += new
76
+ puts "Received #{tweets.length} new tweets"
77
+ max_id = new.last.id
78
+ end
79
+
80
+ if tweets.length == 0
81
+ log "No new tweets"
82
+ else
83
+ @tweets ||= []
84
+ @tweets = tweets.map(&:attrs).each { |tw|
85
+ tw.delete(:entities)
86
+ } + @tweets
87
+ File.open(@path, 'w') do |f|
88
+ f.write(JSON.pretty_generate(@tweets))
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "2.1.2"
2
+ VERSION = "2.1.3"
3
3
  end
@@ -16,7 +16,7 @@ module Ebooks
16
16
  end
17
17
 
18
18
  require 'twitter_ebooks/nlp'
19
- require 'twitter_ebooks/archiver'
19
+ require 'twitter_ebooks/archive'
20
20
  require 'twitter_ebooks/markov'
21
21
  require 'twitter_ebooks/suffix'
22
22
  require 'twitter_ebooks/model'
File without changes
data/skeleton/.gitignore CHANGED
File without changes
data/skeleton/Procfile CHANGED
File without changes
data/skeleton/bots.rb CHANGED
File without changes
data/skeleton/run.rb CHANGED
File without changes
File without changes
data/test/keywords.rb CHANGED
File without changes
data/test/tokenize.rb CHANGED
File without changes
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_ebooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 2.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-11-21 00:00:00.000000000 Z
12
+ date: 2013-11-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: minitest
@@ -167,7 +167,6 @@ files:
167
167
  - Gemfile
168
168
  - Gemfile.lock
169
169
  - LICENSE
170
- - NOTES.md
171
170
  - README.md
172
171
  - Rakefile
173
172
  - bin/ebooks
@@ -175,7 +174,7 @@ files:
175
174
  - data/nouns.txt
176
175
  - data/stopwords.txt
177
176
  - lib/twitter_ebooks.rb
178
- - lib/twitter_ebooks/archiver.rb
177
+ - lib/twitter_ebooks/archive.rb
179
178
  - lib/twitter_ebooks/bot.rb
180
179
  - lib/twitter_ebooks/markov.rb
181
180
  - lib/twitter_ebooks/model.rb
data/NOTES.md DELETED
@@ -1,4 +0,0 @@
1
- - Files in text/ are preprocessed by `rake consume` and serialized
2
- - e.g. text/foo.tweets becomes consumed/foo.corpus
3
- - `rake consume` looks at hashes to know which it needs to update
4
- - Preprocessed corpus files are loaded at runtime by Corpus.load('foo')
@@ -1,82 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
-
4
- require 'twitter'
5
-
6
- module Ebooks
7
- class Archiver
8
- def initialize(username, outpath)
9
- @username = username
10
- @outpath = outpath
11
- @client = Twitter::Client.new
12
- end
13
-
14
- # Read exiting corpus into memory.
15
- # Return list of tweet lines and the last tweet id.
16
- def read_corpus
17
- lines = []
18
- since_id = nil
19
-
20
- if File.exists?(@outpath)
21
- lines = File.read(@outpath).split("\n")
22
- if lines[0].start_with?('#')
23
- since_id = lines[0].split('# ').last
24
- end
25
- end
26
-
27
- [lines, since_id]
28
- end
29
-
30
- # Retrieve all available tweets for a given user since the last tweet id
31
- def tweets_since(since_id)
32
- page = 1
33
- retries = 0
34
- tweets = []
35
- max_id = nil
36
-
37
- opts = {
38
- count: 200,
39
- include_rts: false,
40
- trim_user: true
41
- }
42
-
43
- opts[:since_id] = since_id unless since_id.nil?
44
-
45
- loop do
46
- opts[:max_id] = max_id unless max_id.nil?
47
- new = @client.user_timeline(@username, opts)
48
- break if new.length <= 1
49
- puts "Received #{new.length} tweets"
50
- tweets += new
51
- max_id = new.last.id
52
- break
53
- end
54
-
55
- tweets
56
- end
57
-
58
- def fetch_tweets
59
- lines, since_id = read_corpus
60
-
61
- if since_id.nil?
62
- puts "Retrieving tweets from @#{@username}"
63
- else
64
- puts "Retrieving tweets from @#{@username} since #{since_id}"
65
- end
66
-
67
- tweets = tweets_since(since_id)
68
-
69
- if tweets.length == 0
70
- puts "No new tweets"
71
- return
72
- end
73
-
74
- new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
75
- new_since_id = tweets[0].id.to_s
76
- lines = ["# " + new_since_id] + new_lines + lines
77
- corpus = File.open(@outpath, 'w')
78
- corpus.write(lines.join("\n"))
79
- corpus.close
80
- end
81
- end
82
- end