twitter_ebooks 2.1.2 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/Gemfile.lock +0 -0
- data/LICENSE +0 -0
- data/README.md +1 -1
- data/Rakefile +0 -0
- data/bin/ebooks +28 -1
- data/data/adjectives.txt +0 -0
- data/data/nouns.txt +0 -0
- data/data/stopwords.txt +0 -0
- data/lib/twitter_ebooks/archive.rb +93 -0
- data/lib/twitter_ebooks/bot.rb +0 -0
- data/lib/twitter_ebooks/markov.rb +0 -0
- data/lib/twitter_ebooks/model.rb +0 -0
- data/lib/twitter_ebooks/nlp.rb +0 -0
- data/lib/twitter_ebooks/suffix.rb +0 -0
- data/lib/twitter_ebooks/version.rb +1 -1
- data/lib/twitter_ebooks.rb +1 -1
- data/script/process_anc_data.rb +0 -0
- data/skeleton/.gitignore +0 -0
- data/skeleton/Procfile +0 -0
- data/skeleton/bots.rb +0 -0
- data/skeleton/run.rb +0 -0
- data/test/corpus/0xabad1dea.tweets +0 -0
- data/test/keywords.rb +0 -0
- data/test/tokenize.rb +0 -0
- data/twitter_ebooks.gemspec +0 -0
- metadata +3 -4
- data/NOTES.md +0 -4
- data/lib/twitter_ebooks/archiver.rb +0 -82
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/Gemfile.lock
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.md
CHANGED
data/Rakefile
CHANGED
File without changes
|
data/bin/ebooks
CHANGED
@@ -60,7 +60,7 @@ module Ebooks
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def self.archive(username, outpath)
|
63
|
-
|
63
|
+
Archive.new(username, outpath).sync
|
64
64
|
end
|
65
65
|
|
66
66
|
def self.tweet(modelpath, username)
|
@@ -73,6 +73,31 @@ module Ebooks
|
|
73
73
|
bot.tweet(statement)
|
74
74
|
end
|
75
75
|
|
76
|
+
def self.jsonify(old_path, new_path)
|
77
|
+
name = File.basename(old_path).split('.')[0]
|
78
|
+
new_path ||= name + ".json"
|
79
|
+
|
80
|
+
tweets = []
|
81
|
+
id = nil
|
82
|
+
File.read(old_path).split("\n").each do |l|
|
83
|
+
if l.start_with?('# ')
|
84
|
+
id = l.split('# ')[-1]
|
85
|
+
else
|
86
|
+
tweet = { text: l }
|
87
|
+
if id
|
88
|
+
tweet[:id] = id
|
89
|
+
id = nil
|
90
|
+
end
|
91
|
+
tweets << tweet
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
File.open(new_path, 'w') do |f|
|
96
|
+
log "Writing #{tweets.length} tweets to #{new_path}"
|
97
|
+
f.write(JSON.pretty_generate(tweets))
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
76
101
|
def self.command(args)
|
77
102
|
usage = """Usage:
|
78
103
|
ebooks new <reponame>
|
@@ -81,6 +106,7 @@ module Ebooks
|
|
81
106
|
ebooks score <model_path> <input>
|
82
107
|
ebooks archive <@user> <outpath>
|
83
108
|
ebooks tweet <model_path> <@bot>
|
109
|
+
ebooks jsonify <old_corpus_path> [new_corpus_path]
|
84
110
|
"""
|
85
111
|
|
86
112
|
if args.length == 0
|
@@ -95,6 +121,7 @@ module Ebooks
|
|
95
121
|
when "score" then score(args[1], args[2..-1].join(' '))
|
96
122
|
when "archive" then archive(args[1], args[2])
|
97
123
|
when "tweet" then tweet(args[1], args[2])
|
124
|
+
when "jsonify" then jsonify(args[1], args[2])
|
98
125
|
end
|
99
126
|
end
|
100
127
|
end
|
data/data/adjectives.txt
CHANGED
File without changes
|
data/data/nouns.txt
CHANGED
File without changes
|
data/data/stopwords.txt
CHANGED
File without changes
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'twitter'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
CONFIG_PATH = "/home/#{ENV['USER']}/.ebooksrc"
|
8
|
+
|
9
|
+
module Ebooks
|
10
|
+
class Archive
|
11
|
+
attr_reader :tweets
|
12
|
+
|
13
|
+
def make_client
|
14
|
+
if File.exists?(CONFIG_PATH)
|
15
|
+
@config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
|
16
|
+
else
|
17
|
+
@config = {}
|
18
|
+
|
19
|
+
puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
|
20
|
+
print "Consumer key: "
|
21
|
+
@config[:consumer_key] = STDIN.gets.chomp
|
22
|
+
print "Consumer secret: "
|
23
|
+
@config[:consumer_secret] = STDIN.gets.chomp
|
24
|
+
print "Oauth token: "
|
25
|
+
@config[:oauth_token] = STDIN.gets.chomp
|
26
|
+
print "Oauth secret: "
|
27
|
+
@config[:oauth_token_secret] = STDIN.gets.chomp
|
28
|
+
|
29
|
+
File.open(CONFIG_PATH, 'w') do |f|
|
30
|
+
f.write(JSON.pretty_generate(@config))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
Twitter.configure do |config|
|
35
|
+
config.consumer_key = @config[:consumer_key]
|
36
|
+
config.consumer_secret = @config[:consumer_secret]
|
37
|
+
config.oauth_token = @config[:oauth_token]
|
38
|
+
config.oauth_token_secret = @config[:oauth_token_secret]
|
39
|
+
end
|
40
|
+
|
41
|
+
Twitter::Client.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def initialize(username, path, client=nil)
|
45
|
+
@username = username
|
46
|
+
@path = path || "#{username}.json"
|
47
|
+
@client = client || make_client
|
48
|
+
|
49
|
+
if File.exists?(@path)
|
50
|
+
@tweets = JSON.parse(File.read(@path), symbolize_names: true)
|
51
|
+
log "Currently #{@tweets.length} tweets for #{@username}"
|
52
|
+
else
|
53
|
+
@tweets.nil?
|
54
|
+
log "New archive for @#{username} at #{@path}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def sync
|
59
|
+
retries = 0
|
60
|
+
tweets = []
|
61
|
+
max_id = nil
|
62
|
+
|
63
|
+
opts = {
|
64
|
+
count: 200,
|
65
|
+
#include_rts: false,
|
66
|
+
trim_user: true
|
67
|
+
}
|
68
|
+
|
69
|
+
opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
|
70
|
+
|
71
|
+
loop do
|
72
|
+
opts[:max_id] = max_id unless max_id.nil?
|
73
|
+
new = @client.user_timeline(@username, opts)
|
74
|
+
break if new.length <= 1
|
75
|
+
tweets += new
|
76
|
+
puts "Received #{tweets.length} new tweets"
|
77
|
+
max_id = new.last.id
|
78
|
+
end
|
79
|
+
|
80
|
+
if tweets.length == 0
|
81
|
+
log "No new tweets"
|
82
|
+
else
|
83
|
+
@tweets ||= []
|
84
|
+
@tweets = tweets.map(&:attrs).each { |tw|
|
85
|
+
tw.delete(:entities)
|
86
|
+
} + @tweets
|
87
|
+
File.open(@path, 'w') do |f|
|
88
|
+
f.write(JSON.pretty_generate(@tweets))
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
data/lib/twitter_ebooks/bot.rb
CHANGED
File without changes
|
File without changes
|
data/lib/twitter_ebooks/model.rb
CHANGED
File without changes
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
File without changes
|
File without changes
|
data/lib/twitter_ebooks.rb
CHANGED
data/script/process_anc_data.rb
CHANGED
File without changes
|
data/skeleton/.gitignore
CHANGED
File without changes
|
data/skeleton/Procfile
CHANGED
File without changes
|
data/skeleton/bots.rb
CHANGED
File without changes
|
data/skeleton/run.rb
CHANGED
File without changes
|
File without changes
|
data/test/keywords.rb
CHANGED
File without changes
|
data/test/tokenize.rb
CHANGED
File without changes
|
data/twitter_ebooks.gemspec
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: minitest
|
@@ -167,7 +167,6 @@ files:
|
|
167
167
|
- Gemfile
|
168
168
|
- Gemfile.lock
|
169
169
|
- LICENSE
|
170
|
-
- NOTES.md
|
171
170
|
- README.md
|
172
171
|
- Rakefile
|
173
172
|
- bin/ebooks
|
@@ -175,7 +174,7 @@ files:
|
|
175
174
|
- data/nouns.txt
|
176
175
|
- data/stopwords.txt
|
177
176
|
- lib/twitter_ebooks.rb
|
178
|
-
- lib/twitter_ebooks/
|
177
|
+
- lib/twitter_ebooks/archive.rb
|
179
178
|
- lib/twitter_ebooks/bot.rb
|
180
179
|
- lib/twitter_ebooks/markov.rb
|
181
180
|
- lib/twitter_ebooks/model.rb
|
data/NOTES.md
DELETED
@@ -1,82 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# encoding: utf-8
|
3
|
-
|
4
|
-
require 'twitter'
|
5
|
-
|
6
|
-
module Ebooks
|
7
|
-
class Archiver
|
8
|
-
def initialize(username, outpath)
|
9
|
-
@username = username
|
10
|
-
@outpath = outpath
|
11
|
-
@client = Twitter::Client.new
|
12
|
-
end
|
13
|
-
|
14
|
-
# Read exiting corpus into memory.
|
15
|
-
# Return list of tweet lines and the last tweet id.
|
16
|
-
def read_corpus
|
17
|
-
lines = []
|
18
|
-
since_id = nil
|
19
|
-
|
20
|
-
if File.exists?(@outpath)
|
21
|
-
lines = File.read(@outpath).split("\n")
|
22
|
-
if lines[0].start_with?('#')
|
23
|
-
since_id = lines[0].split('# ').last
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
[lines, since_id]
|
28
|
-
end
|
29
|
-
|
30
|
-
# Retrieve all available tweets for a given user since the last tweet id
|
31
|
-
def tweets_since(since_id)
|
32
|
-
page = 1
|
33
|
-
retries = 0
|
34
|
-
tweets = []
|
35
|
-
max_id = nil
|
36
|
-
|
37
|
-
opts = {
|
38
|
-
count: 200,
|
39
|
-
include_rts: false,
|
40
|
-
trim_user: true
|
41
|
-
}
|
42
|
-
|
43
|
-
opts[:since_id] = since_id unless since_id.nil?
|
44
|
-
|
45
|
-
loop do
|
46
|
-
opts[:max_id] = max_id unless max_id.nil?
|
47
|
-
new = @client.user_timeline(@username, opts)
|
48
|
-
break if new.length <= 1
|
49
|
-
puts "Received #{new.length} tweets"
|
50
|
-
tweets += new
|
51
|
-
max_id = new.last.id
|
52
|
-
break
|
53
|
-
end
|
54
|
-
|
55
|
-
tweets
|
56
|
-
end
|
57
|
-
|
58
|
-
def fetch_tweets
|
59
|
-
lines, since_id = read_corpus
|
60
|
-
|
61
|
-
if since_id.nil?
|
62
|
-
puts "Retrieving tweets from @#{@username}"
|
63
|
-
else
|
64
|
-
puts "Retrieving tweets from @#{@username} since #{since_id}"
|
65
|
-
end
|
66
|
-
|
67
|
-
tweets = tweets_since(since_id)
|
68
|
-
|
69
|
-
if tweets.length == 0
|
70
|
-
puts "No new tweets"
|
71
|
-
return
|
72
|
-
end
|
73
|
-
|
74
|
-
new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
|
75
|
-
new_since_id = tweets[0].id.to_s
|
76
|
-
lines = ["# " + new_since_id] + new_lines + lines
|
77
|
-
corpus = File.open(@outpath, 'w')
|
78
|
-
corpus.write(lines.join("\n"))
|
79
|
-
corpus.close
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|