twitter_ebooks 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/Gemfile.lock CHANGED
File without changes
data/LICENSE CHANGED
File without changes
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # twitter\_ebooks 2.1.2
1
+ # twitter\_ebooks 2.1.3
2
2
 
3
3
  Complete rewrite of twitter\_ebooks. Allows context-sensitive responsive bots via the Twitter streaming API, along with higher-quality ngram modeling. Still needs a bit of cleaning and documenting.
4
4
 
data/Rakefile CHANGED
File without changes
data/bin/ebooks CHANGED
@@ -60,7 +60,7 @@ module Ebooks
60
60
  end
61
61
 
62
62
  def self.archive(username, outpath)
63
- Archiver.new(username, outpath).fetch_tweets
63
+ Archive.new(username, outpath).sync
64
64
  end
65
65
 
66
66
  def self.tweet(modelpath, username)
@@ -73,6 +73,31 @@ module Ebooks
73
73
  bot.tweet(statement)
74
74
  end
75
75
 
76
+ def self.jsonify(old_path, new_path)
77
+ name = File.basename(old_path).split('.')[0]
78
+ new_path ||= name + ".json"
79
+
80
+ tweets = []
81
+ id = nil
82
+ File.read(old_path).split("\n").each do |l|
83
+ if l.start_with?('# ')
84
+ id = l.split('# ')[-1]
85
+ else
86
+ tweet = { text: l }
87
+ if id
88
+ tweet[:id] = id
89
+ id = nil
90
+ end
91
+ tweets << tweet
92
+ end
93
+ end
94
+
95
+ File.open(new_path, 'w') do |f|
96
+ log "Writing #{tweets.length} tweets to #{new_path}"
97
+ f.write(JSON.pretty_generate(tweets))
98
+ end
99
+ end
100
+
76
101
  def self.command(args)
77
102
  usage = """Usage:
78
103
  ebooks new <reponame>
@@ -81,6 +106,7 @@ module Ebooks
81
106
  ebooks score <model_path> <input>
82
107
  ebooks archive <@user> <outpath>
83
108
  ebooks tweet <model_path> <@bot>
109
+ ebooks jsonify <old_corpus_path> [new_corpus_path]
84
110
  """
85
111
 
86
112
  if args.length == 0
@@ -95,6 +121,7 @@ module Ebooks
95
121
  when "score" then score(args[1], args[2..-1].join(' '))
96
122
  when "archive" then archive(args[1], args[2])
97
123
  when "tweet" then tweet(args[1], args[2])
124
+ when "jsonify" then jsonify(args[1], args[2])
98
125
  end
99
126
  end
100
127
  end
data/data/adjectives.txt CHANGED
File without changes
data/data/nouns.txt CHANGED
File without changes
data/data/stopwords.txt CHANGED
File without changes
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'twitter'
5
+ require 'json'
6
+
7
+ CONFIG_PATH = "/home/#{ENV['USER']}/.ebooksrc"
8
+
9
+ module Ebooks
10
+ class Archive
11
+ attr_reader :tweets
12
+
13
+ def make_client
14
+ if File.exists?(CONFIG_PATH)
15
+ @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
16
+ else
17
+ @config = {}
18
+
19
+ puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
20
+ print "Consumer key: "
21
+ @config[:consumer_key] = STDIN.gets.chomp
22
+ print "Consumer secret: "
23
+ @config[:consumer_secret] = STDIN.gets.chomp
24
+ print "Oauth token: "
25
+ @config[:oauth_token] = STDIN.gets.chomp
26
+ print "Oauth secret: "
27
+ @config[:oauth_token_secret] = STDIN.gets.chomp
28
+
29
+ File.open(CONFIG_PATH, 'w') do |f|
30
+ f.write(JSON.pretty_generate(@config))
31
+ end
32
+ end
33
+
34
+ Twitter.configure do |config|
35
+ config.consumer_key = @config[:consumer_key]
36
+ config.consumer_secret = @config[:consumer_secret]
37
+ config.oauth_token = @config[:oauth_token]
38
+ config.oauth_token_secret = @config[:oauth_token_secret]
39
+ end
40
+
41
+ Twitter::Client.new
42
+ end
43
+
44
+ def initialize(username, path, client=nil)
45
+ @username = username
46
+ @path = path || "#{username}.json"
47
+ @client = client || make_client
48
+
49
+ if File.exists?(@path)
50
+ @tweets = JSON.parse(File.read(@path), symbolize_names: true)
51
+ log "Currently #{@tweets.length} tweets for #{@username}"
52
+ else
53
+ @tweets.nil?
54
+ log "New archive for @#{username} at #{@path}"
55
+ end
56
+ end
57
+
58
+ def sync
59
+ retries = 0
60
+ tweets = []
61
+ max_id = nil
62
+
63
+ opts = {
64
+ count: 200,
65
+ #include_rts: false,
66
+ trim_user: true
67
+ }
68
+
69
+ opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
70
+
71
+ loop do
72
+ opts[:max_id] = max_id unless max_id.nil?
73
+ new = @client.user_timeline(@username, opts)
74
+ break if new.length <= 1
75
+ tweets += new
76
+ puts "Received #{tweets.length} new tweets"
77
+ max_id = new.last.id
78
+ end
79
+
80
+ if tweets.length == 0
81
+ log "No new tweets"
82
+ else
83
+ @tweets ||= []
84
+ @tweets = tweets.map(&:attrs).each { |tw|
85
+ tw.delete(:entities)
86
+ } + @tweets
87
+ File.open(@path, 'w') do |f|
88
+ f.write(JSON.pretty_generate(@tweets))
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "2.1.2"
2
+ VERSION = "2.1.3"
3
3
  end
@@ -16,7 +16,7 @@ module Ebooks
16
16
  end
17
17
 
18
18
  require 'twitter_ebooks/nlp'
19
- require 'twitter_ebooks/archiver'
19
+ require 'twitter_ebooks/archive'
20
20
  require 'twitter_ebooks/markov'
21
21
  require 'twitter_ebooks/suffix'
22
22
  require 'twitter_ebooks/model'
File without changes
data/skeleton/.gitignore CHANGED
File without changes
data/skeleton/Procfile CHANGED
File without changes
data/skeleton/bots.rb CHANGED
File without changes
data/skeleton/run.rb CHANGED
File without changes
File without changes
data/test/keywords.rb CHANGED
File without changes
data/test/tokenize.rb CHANGED
File without changes
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_ebooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 2.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-11-21 00:00:00.000000000 Z
12
+ date: 2013-11-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: minitest
@@ -167,7 +167,6 @@ files:
167
167
  - Gemfile
168
168
  - Gemfile.lock
169
169
  - LICENSE
170
- - NOTES.md
171
170
  - README.md
172
171
  - Rakefile
173
172
  - bin/ebooks
@@ -175,7 +174,7 @@ files:
175
174
  - data/nouns.txt
176
175
  - data/stopwords.txt
177
176
  - lib/twitter_ebooks.rb
178
- - lib/twitter_ebooks/archiver.rb
177
+ - lib/twitter_ebooks/archive.rb
179
178
  - lib/twitter_ebooks/bot.rb
180
179
  - lib/twitter_ebooks/markov.rb
181
180
  - lib/twitter_ebooks/model.rb
data/NOTES.md DELETED
@@ -1,4 +0,0 @@
1
- - Files in text/ are preprocessed by `rake consume` and serialized
2
- - e.g. text/foo.tweets becomes consumed/foo.corpus
3
- - `rake consume` looks at hashes to know which it needs to update
4
- - Preprocessed corpus files are loaded at runtime by Corpus.load('foo')
@@ -1,82 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
-
4
- require 'twitter'
5
-
6
- module Ebooks
7
- class Archiver
8
- def initialize(username, outpath)
9
- @username = username
10
- @outpath = outpath
11
- @client = Twitter::Client.new
12
- end
13
-
14
- # Read exiting corpus into memory.
15
- # Return list of tweet lines and the last tweet id.
16
- def read_corpus
17
- lines = []
18
- since_id = nil
19
-
20
- if File.exists?(@outpath)
21
- lines = File.read(@outpath).split("\n")
22
- if lines[0].start_with?('#')
23
- since_id = lines[0].split('# ').last
24
- end
25
- end
26
-
27
- [lines, since_id]
28
- end
29
-
30
- # Retrieve all available tweets for a given user since the last tweet id
31
- def tweets_since(since_id)
32
- page = 1
33
- retries = 0
34
- tweets = []
35
- max_id = nil
36
-
37
- opts = {
38
- count: 200,
39
- include_rts: false,
40
- trim_user: true
41
- }
42
-
43
- opts[:since_id] = since_id unless since_id.nil?
44
-
45
- loop do
46
- opts[:max_id] = max_id unless max_id.nil?
47
- new = @client.user_timeline(@username, opts)
48
- break if new.length <= 1
49
- puts "Received #{new.length} tweets"
50
- tweets += new
51
- max_id = new.last.id
52
- break
53
- end
54
-
55
- tweets
56
- end
57
-
58
- def fetch_tweets
59
- lines, since_id = read_corpus
60
-
61
- if since_id.nil?
62
- puts "Retrieving tweets from @#{@username}"
63
- else
64
- puts "Retrieving tweets from @#{@username} since #{since_id}"
65
- end
66
-
67
- tweets = tweets_since(since_id)
68
-
69
- if tweets.length == 0
70
- puts "No new tweets"
71
- return
72
- end
73
-
74
- new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
75
- new_since_id = tweets[0].id.to_s
76
- lines = ["# " + new_since_id] + new_lines + lines
77
- corpus = File.open(@outpath, 'w')
78
- corpus.write(lines.join("\n"))
79
- corpus.close
80
- end
81
- end
82
- end