twitter_ebooks 2.1.2 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/Gemfile.lock +0 -0
- data/LICENSE +0 -0
- data/README.md +1 -1
- data/Rakefile +0 -0
- data/bin/ebooks +28 -1
- data/data/adjectives.txt +0 -0
- data/data/nouns.txt +0 -0
- data/data/stopwords.txt +0 -0
- data/lib/twitter_ebooks/archive.rb +93 -0
- data/lib/twitter_ebooks/bot.rb +0 -0
- data/lib/twitter_ebooks/markov.rb +0 -0
- data/lib/twitter_ebooks/model.rb +0 -0
- data/lib/twitter_ebooks/nlp.rb +0 -0
- data/lib/twitter_ebooks/suffix.rb +0 -0
- data/lib/twitter_ebooks/version.rb +1 -1
- data/lib/twitter_ebooks.rb +1 -1
- data/script/process_anc_data.rb +0 -0
- data/skeleton/.gitignore +0 -0
- data/skeleton/Procfile +0 -0
- data/skeleton/bots.rb +0 -0
- data/skeleton/run.rb +0 -0
- data/test/corpus/0xabad1dea.tweets +0 -0
- data/test/keywords.rb +0 -0
- data/test/tokenize.rb +0 -0
- data/twitter_ebooks.gemspec +0 -0
- metadata +3 -4
- data/NOTES.md +0 -4
- data/lib/twitter_ebooks/archiver.rb +0 -82
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/Gemfile.lock
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.md
CHANGED
data/Rakefile
CHANGED
File without changes
|
data/bin/ebooks
CHANGED
@@ -60,7 +60,7 @@ module Ebooks
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def self.archive(username, outpath)
|
63
|
-
|
63
|
+
Archive.new(username, outpath).sync
|
64
64
|
end
|
65
65
|
|
66
66
|
def self.tweet(modelpath, username)
|
@@ -73,6 +73,31 @@ module Ebooks
|
|
73
73
|
bot.tweet(statement)
|
74
74
|
end
|
75
75
|
|
76
|
+
def self.jsonify(old_path, new_path)
|
77
|
+
name = File.basename(old_path).split('.')[0]
|
78
|
+
new_path ||= name + ".json"
|
79
|
+
|
80
|
+
tweets = []
|
81
|
+
id = nil
|
82
|
+
File.read(old_path).split("\n").each do |l|
|
83
|
+
if l.start_with?('# ')
|
84
|
+
id = l.split('# ')[-1]
|
85
|
+
else
|
86
|
+
tweet = { text: l }
|
87
|
+
if id
|
88
|
+
tweet[:id] = id
|
89
|
+
id = nil
|
90
|
+
end
|
91
|
+
tweets << tweet
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
File.open(new_path, 'w') do |f|
|
96
|
+
log "Writing #{tweets.length} tweets to #{new_path}"
|
97
|
+
f.write(JSON.pretty_generate(tweets))
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
76
101
|
def self.command(args)
|
77
102
|
usage = """Usage:
|
78
103
|
ebooks new <reponame>
|
@@ -81,6 +106,7 @@ module Ebooks
|
|
81
106
|
ebooks score <model_path> <input>
|
82
107
|
ebooks archive <@user> <outpath>
|
83
108
|
ebooks tweet <model_path> <@bot>
|
109
|
+
ebooks jsonify <old_corpus_path> [new_corpus_path]
|
84
110
|
"""
|
85
111
|
|
86
112
|
if args.length == 0
|
@@ -95,6 +121,7 @@ module Ebooks
|
|
95
121
|
when "score" then score(args[1], args[2..-1].join(' '))
|
96
122
|
when "archive" then archive(args[1], args[2])
|
97
123
|
when "tweet" then tweet(args[1], args[2])
|
124
|
+
when "jsonify" then jsonify(args[1], args[2])
|
98
125
|
end
|
99
126
|
end
|
100
127
|
end
|
data/data/adjectives.txt
CHANGED
File without changes
|
data/data/nouns.txt
CHANGED
File without changes
|
data/data/stopwords.txt
CHANGED
File without changes
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'twitter'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
CONFIG_PATH = "/home/#{ENV['USER']}/.ebooksrc"
|
8
|
+
|
9
|
+
module Ebooks
|
10
|
+
class Archive
|
11
|
+
attr_reader :tweets
|
12
|
+
|
13
|
+
def make_client
|
14
|
+
if File.exists?(CONFIG_PATH)
|
15
|
+
@config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
|
16
|
+
else
|
17
|
+
@config = {}
|
18
|
+
|
19
|
+
puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
|
20
|
+
print "Consumer key: "
|
21
|
+
@config[:consumer_key] = STDIN.gets.chomp
|
22
|
+
print "Consumer secret: "
|
23
|
+
@config[:consumer_secret] = STDIN.gets.chomp
|
24
|
+
print "Oauth token: "
|
25
|
+
@config[:oauth_token] = STDIN.gets.chomp
|
26
|
+
print "Oauth secret: "
|
27
|
+
@config[:oauth_token_secret] = STDIN.gets.chomp
|
28
|
+
|
29
|
+
File.open(CONFIG_PATH, 'w') do |f|
|
30
|
+
f.write(JSON.pretty_generate(@config))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
Twitter.configure do |config|
|
35
|
+
config.consumer_key = @config[:consumer_key]
|
36
|
+
config.consumer_secret = @config[:consumer_secret]
|
37
|
+
config.oauth_token = @config[:oauth_token]
|
38
|
+
config.oauth_token_secret = @config[:oauth_token_secret]
|
39
|
+
end
|
40
|
+
|
41
|
+
Twitter::Client.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def initialize(username, path, client=nil)
|
45
|
+
@username = username
|
46
|
+
@path = path || "#{username}.json"
|
47
|
+
@client = client || make_client
|
48
|
+
|
49
|
+
if File.exists?(@path)
|
50
|
+
@tweets = JSON.parse(File.read(@path), symbolize_names: true)
|
51
|
+
log "Currently #{@tweets.length} tweets for #{@username}"
|
52
|
+
else
|
53
|
+
@tweets.nil?
|
54
|
+
log "New archive for @#{username} at #{@path}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def sync
|
59
|
+
retries = 0
|
60
|
+
tweets = []
|
61
|
+
max_id = nil
|
62
|
+
|
63
|
+
opts = {
|
64
|
+
count: 200,
|
65
|
+
#include_rts: false,
|
66
|
+
trim_user: true
|
67
|
+
}
|
68
|
+
|
69
|
+
opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
|
70
|
+
|
71
|
+
loop do
|
72
|
+
opts[:max_id] = max_id unless max_id.nil?
|
73
|
+
new = @client.user_timeline(@username, opts)
|
74
|
+
break if new.length <= 1
|
75
|
+
tweets += new
|
76
|
+
puts "Received #{tweets.length} new tweets"
|
77
|
+
max_id = new.last.id
|
78
|
+
end
|
79
|
+
|
80
|
+
if tweets.length == 0
|
81
|
+
log "No new tweets"
|
82
|
+
else
|
83
|
+
@tweets ||= []
|
84
|
+
@tweets = tweets.map(&:attrs).each { |tw|
|
85
|
+
tw.delete(:entities)
|
86
|
+
} + @tweets
|
87
|
+
File.open(@path, 'w') do |f|
|
88
|
+
f.write(JSON.pretty_generate(@tweets))
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
data/lib/twitter_ebooks/bot.rb
CHANGED
File without changes
|
File without changes
|
data/lib/twitter_ebooks/model.rb
CHANGED
File without changes
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
File without changes
|
File without changes
|
data/lib/twitter_ebooks.rb
CHANGED
data/script/process_anc_data.rb
CHANGED
File without changes
|
data/skeleton/.gitignore
CHANGED
File without changes
|
data/skeleton/Procfile
CHANGED
File without changes
|
data/skeleton/bots.rb
CHANGED
File without changes
|
data/skeleton/run.rb
CHANGED
File without changes
|
File without changes
|
data/test/keywords.rb
CHANGED
File without changes
|
data/test/tokenize.rb
CHANGED
File without changes
|
data/twitter_ebooks.gemspec
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: minitest
|
@@ -167,7 +167,6 @@ files:
|
|
167
167
|
- Gemfile
|
168
168
|
- Gemfile.lock
|
169
169
|
- LICENSE
|
170
|
-
- NOTES.md
|
171
170
|
- README.md
|
172
171
|
- Rakefile
|
173
172
|
- bin/ebooks
|
@@ -175,7 +174,7 @@ files:
|
|
175
174
|
- data/nouns.txt
|
176
175
|
- data/stopwords.txt
|
177
176
|
- lib/twitter_ebooks.rb
|
178
|
-
- lib/twitter_ebooks/
|
177
|
+
- lib/twitter_ebooks/archive.rb
|
179
178
|
- lib/twitter_ebooks/bot.rb
|
180
179
|
- lib/twitter_ebooks/markov.rb
|
181
180
|
- lib/twitter_ebooks/model.rb
|
data/NOTES.md
DELETED
@@ -1,82 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# encoding: utf-8
|
3
|
-
|
4
|
-
require 'twitter'
|
5
|
-
|
6
|
-
module Ebooks
|
7
|
-
class Archiver
|
8
|
-
def initialize(username, outpath)
|
9
|
-
@username = username
|
10
|
-
@outpath = outpath
|
11
|
-
@client = Twitter::Client.new
|
12
|
-
end
|
13
|
-
|
14
|
-
# Read exiting corpus into memory.
|
15
|
-
# Return list of tweet lines and the last tweet id.
|
16
|
-
def read_corpus
|
17
|
-
lines = []
|
18
|
-
since_id = nil
|
19
|
-
|
20
|
-
if File.exists?(@outpath)
|
21
|
-
lines = File.read(@outpath).split("\n")
|
22
|
-
if lines[0].start_with?('#')
|
23
|
-
since_id = lines[0].split('# ').last
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
[lines, since_id]
|
28
|
-
end
|
29
|
-
|
30
|
-
# Retrieve all available tweets for a given user since the last tweet id
|
31
|
-
def tweets_since(since_id)
|
32
|
-
page = 1
|
33
|
-
retries = 0
|
34
|
-
tweets = []
|
35
|
-
max_id = nil
|
36
|
-
|
37
|
-
opts = {
|
38
|
-
count: 200,
|
39
|
-
include_rts: false,
|
40
|
-
trim_user: true
|
41
|
-
}
|
42
|
-
|
43
|
-
opts[:since_id] = since_id unless since_id.nil?
|
44
|
-
|
45
|
-
loop do
|
46
|
-
opts[:max_id] = max_id unless max_id.nil?
|
47
|
-
new = @client.user_timeline(@username, opts)
|
48
|
-
break if new.length <= 1
|
49
|
-
puts "Received #{new.length} tweets"
|
50
|
-
tweets += new
|
51
|
-
max_id = new.last.id
|
52
|
-
break
|
53
|
-
end
|
54
|
-
|
55
|
-
tweets
|
56
|
-
end
|
57
|
-
|
58
|
-
def fetch_tweets
|
59
|
-
lines, since_id = read_corpus
|
60
|
-
|
61
|
-
if since_id.nil?
|
62
|
-
puts "Retrieving tweets from @#{@username}"
|
63
|
-
else
|
64
|
-
puts "Retrieving tweets from @#{@username} since #{since_id}"
|
65
|
-
end
|
66
|
-
|
67
|
-
tweets = tweets_since(since_id)
|
68
|
-
|
69
|
-
if tweets.length == 0
|
70
|
-
puts "No new tweets"
|
71
|
-
return
|
72
|
-
end
|
73
|
-
|
74
|
-
new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
|
75
|
-
new_since_id = tweets[0].id.to_s
|
76
|
-
lines = ["# " + new_since_id] + new_lines + lines
|
77
|
-
corpus = File.open(@outpath, 'w')
|
78
|
-
corpus.write(lines.join("\n"))
|
79
|
-
corpus.close
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|