rbitter 0.1.2-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.rspec +2 -0
- data/.travis.yml +15 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +72 -0
- data/Rakefile +8 -0
- data/XMLRPC.md +19 -0
- data/bin/rbitter +20 -0
- data/lib/rbitter.rb +86 -0
- data/lib/rbitter/arcserver.rb +130 -0
- data/lib/rbitter/console.rb +93 -0
- data/lib/rbitter/default/config_json.rb +37 -0
- data/lib/rbitter/dlthread.rb +63 -0
- data/lib/rbitter/env.rb +62 -0
- data/lib/rbitter/libtwitter_connection_override.rb +46 -0
- data/lib/rbitter/records.rb +121 -0
- data/lib/rbitter/records_migrate/.keep +0 -0
- data/lib/rbitter/records_migrate/20150327_add_index.rb +12 -0
- data/lib/rbitter/records_migrate/20150504_add_replyto_column.rb +12 -0
- data/lib/rbitter/streaming.rb +105 -0
- data/lib/rbitter/version.rb +20 -0
- data/lib/rbitter/xmlrpc.rb +4 -0
- data/lib/rbitter/xmlrpcd/base.rb +25 -0
- data/lib/rbitter/xmlrpcd/rpchandles.rb +12 -0
- data/lib/rbitter/xmlrpcd/xmlrpc_auth_server.rb +83 -0
- data/lib/rbitter/xmlrpcd/xmlrpcd.rb +69 -0
- data/rbitter.gemspec +46 -0
- data/spec/config/.keep +0 -0
- data/spec/config/default.json +33 -0
- data/spec/rbitter/arcserver_spec.rb +30 -0
- data/spec/rbitter/console_spec.rb +9 -0
- data/spec/rbitter/default/config_json_spec.rb +3 -0
- data/spec/rbitter/dlthread_spec.rb +8 -0
- data/spec/rbitter/env_spec.rb +62 -0
- data/spec/rbitter/libtwitter_connection_override_spec.rb +8 -0
- data/spec/rbitter/records_spec.rb +13 -0
- data/spec/rbitter/streaming_spec.rb +9 -0
- data/spec/rbitter/version_spec.rb +8 -0
- data/spec/rbitter/xmlrpc_spec.rb +8 -0
- data/spec/rbitter/xmlrpcd/base_spec.rb +29 -0
- data/spec/rbitter/xmlrpcd/rpchandles_spec.rb +10 -0
- data/spec/rbitter/xmlrpcd/xmlrpc_auth_server_spec.rb +8 -0
- data/spec/rbitter/xmlrpcd/xmlrpcd_spec.rb +9 -0
- data/spec/rbitter_spec.rb +42 -0
- data/spec/sample_data/.keep +0 -0
- data/spec/spec_helper.rb +39 -0
- metadata +265 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
module Rbitter
|
2
|
+
DEFAULT_CONFIG_JSON = <<-ENDOFJSON
|
3
|
+
{
|
4
|
+
"twitter": {
|
5
|
+
"consumer_key": "",
|
6
|
+
"consumer_secret": "",
|
7
|
+
"access_token": "",
|
8
|
+
"access_token_secret": ""
|
9
|
+
},
|
10
|
+
"activerecord": "sqlite3",
|
11
|
+
"sqlite3": {
|
12
|
+
"dbfile": "rbitter.sqlite"
|
13
|
+
},
|
14
|
+
"mysql2": {
|
15
|
+
"host": "localhost",
|
16
|
+
"port": 3306,
|
17
|
+
"dbname": "archive",
|
18
|
+
"username": "",
|
19
|
+
"password": ""
|
20
|
+
},
|
21
|
+
"media_downloader": {
|
22
|
+
"large_image": true,
|
23
|
+
"download_dir": "imgs/"
|
24
|
+
},
|
25
|
+
"xmlrpc": {
|
26
|
+
"enable": true,
|
27
|
+
"bind_host": "0.0.0.0",
|
28
|
+
"bind_port": 1400,
|
29
|
+
"auth": {
|
30
|
+
"username": "username",
|
31
|
+
"password": "password"
|
32
|
+
},
|
33
|
+
"handles": ["/path/to/handles"]
|
34
|
+
}
|
35
|
+
}
|
36
|
+
ENDOFJSON
|
37
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "net/http"
|
4
|
+
require "openssl"
|
5
|
+
|
6
|
+
module Rbitter
|
7
|
+
class DLThread
|
8
|
+
def initialize(dlfolder, large_flag)
|
9
|
+
@dest = dlfolder
|
10
|
+
if not File.directory?(dlfolder)
|
11
|
+
warn "[dlthread] Given download location is not available for downloading."
|
12
|
+
warn "[dlthread] Fallback to current directory."
|
13
|
+
@dest = "./"
|
14
|
+
end
|
15
|
+
|
16
|
+
if large_flag.nil?
|
17
|
+
@large_image = false
|
18
|
+
else
|
19
|
+
@large_image = large_flag
|
20
|
+
end
|
21
|
+
|
22
|
+
@pool = Array.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def <<(url_array)
|
26
|
+
download_task = Thread.new {
|
27
|
+
url_array.each { |url|
|
28
|
+
uri = URI.parse(@large_image ? url + ":large" : url)
|
29
|
+
ssl = uri.scheme.downcase == 'https'
|
30
|
+
|
31
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => ssl) { |h|
|
32
|
+
req = Net::HTTP::Get.new uri.request_uri
|
33
|
+
h.request(req) { |res|
|
34
|
+
case res
|
35
|
+
when Net::HTTPOK
|
36
|
+
fname = File.basename(url)
|
37
|
+
|
38
|
+
puts "[fetch] remote: #{uri.path} => local: #{fname}"
|
39
|
+
open(File.join(@dest, fname), "wb") { |file|
|
40
|
+
res.read_body { |chunk| file.write(chunk) }
|
41
|
+
}
|
42
|
+
end
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
@pool.push download_task
|
49
|
+
end
|
50
|
+
|
51
|
+
def job_cleanup
|
52
|
+
until @pool.empty?
|
53
|
+
dlthrd = @pool.shift
|
54
|
+
|
55
|
+
if dlthrd.alive?
|
56
|
+
puts "[dlthread] Thread forceful cleaning up [remains: #{@pool.length}]"
|
57
|
+
dlthrd.terminate
|
58
|
+
dlthrd.join
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/rbitter/env.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
module Rbitter
|
6
|
+
@@env = Hash.new
|
7
|
+
|
8
|
+
class ConfigFileError < StandardError; end
|
9
|
+
|
10
|
+
def self.[](k)
|
11
|
+
@@env[k]
|
12
|
+
end
|
13
|
+
|
14
|
+
module_function
|
15
|
+
def env
|
16
|
+
@@env
|
17
|
+
end
|
18
|
+
|
19
|
+
def env_reset
|
20
|
+
@@env.clear
|
21
|
+
end
|
22
|
+
|
23
|
+
def env_validate?
|
24
|
+
# TODO: Add validator
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
def config_initialize json_path=nil
|
29
|
+
env_reset
|
30
|
+
|
31
|
+
unless json_path.nil?
|
32
|
+
begin
|
33
|
+
open(json_path, 'r') { |file|
|
34
|
+
@@env = JSON.parse(file.read)
|
35
|
+
}
|
36
|
+
|
37
|
+
return @@env if env_validate?
|
38
|
+
fail StandardError, "Invalid configuration"
|
39
|
+
rescue => e
|
40
|
+
fail ConfigFileError, "Load Failure (#{json_path}): #{e.to_s}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Configuration default location
|
45
|
+
# 1. (current_dir)/config.json
|
46
|
+
# 2. (current_dir)/.rbitter/config.json
|
47
|
+
locations = ["config.json", ".rbitter/config.json"]
|
48
|
+
locations.collect! { |base| File.join(Dir.pwd, base) }
|
49
|
+
|
50
|
+
for location in locations
|
51
|
+
next unless File.file?(location)
|
52
|
+
open(location, 'r') { |file|
|
53
|
+
@@env = JSON.parse(file.read)
|
54
|
+
}
|
55
|
+
break if env_validate?
|
56
|
+
end
|
57
|
+
|
58
|
+
if @@env.empty?
|
59
|
+
fail ConfigFileError, "Can not load any configuration in [#{locations.join(', ')}]"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'http/parser'
|
4
|
+
require 'openssl'
|
5
|
+
require 'resolv'
|
6
|
+
|
7
|
+
module Twitter
|
8
|
+
module Streaming
|
9
|
+
class Connection
|
10
|
+
MODIFIED = true
|
11
|
+
attr_reader :tcp_socket_class, :ssl_socket_class
|
12
|
+
|
13
|
+
def initialize(options = {})
|
14
|
+
@tcp_socket_class = options.fetch(:tcp_socket_class) { TCPSocket }
|
15
|
+
@ssl_socket_class = options.fetch(:ssl_socket_class) { OpenSSL::SSL::SSLSocket }
|
16
|
+
end
|
17
|
+
|
18
|
+
def stream(request, response)
|
19
|
+
client_context = OpenSSL::SSL::SSLContext.new
|
20
|
+
client = @tcp_socket_class.new(Resolv.getaddress(request.uri.host), request.uri.port)
|
21
|
+
ssl_client = @ssl_socket_class.new(client, client_context)
|
22
|
+
ssl_client.connect
|
23
|
+
request.stream(ssl_client)
|
24
|
+
|
25
|
+
loop {
|
26
|
+
begin
|
27
|
+
body = ssl_client.read_nonblock(1024) # rubocop:disable AssignmentInCondition, WhileUntilModifier
|
28
|
+
response << body
|
29
|
+
rescue IO::WaitReadable
|
30
|
+
# The reason for setting 90 seconds as a timeout is documented on:
|
31
|
+
# https://dev.twitter.com/streaming/overview/connecting
|
32
|
+
r, w, e = IO.select([ssl_client], [], [], 90)
|
33
|
+
if r.nil?
|
34
|
+
# If timeout occurs
|
35
|
+
ssl_client.close
|
36
|
+
raise Twitter::Error::ServerError.new("Connection stalled")
|
37
|
+
else
|
38
|
+
# If socket is readable
|
39
|
+
retry
|
40
|
+
end
|
41
|
+
end
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "active_record"
|
4
|
+
require "date"
|
5
|
+
|
6
|
+
module Rbitter
|
7
|
+
class Record < ActiveRecord::Base
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module ARSupport
|
12
|
+
SCHEME_VERSION = 20150504
|
13
|
+
SCHEME = {
|
14
|
+
:marker => :integer, # 0 normal, 1 begin 2 halt
|
15
|
+
:marker_msg => :string,
|
16
|
+
:userid => :integer,
|
17
|
+
:username => :string,
|
18
|
+
:tweetid => :integer,
|
19
|
+
:replyto => :integer,
|
20
|
+
:tweet => :text, # with url unpacked
|
21
|
+
:date => :datetime,
|
22
|
+
:rt_count => :integer,
|
23
|
+
:fav_count => :integer
|
24
|
+
}
|
25
|
+
|
26
|
+
module_function
|
27
|
+
def prepared?
|
28
|
+
ActiveRecord::Base.connection.table_exists?(:records)
|
29
|
+
end
|
30
|
+
|
31
|
+
def connect_database
|
32
|
+
if Rbitter['activerecord'] == 'sqlite3'
|
33
|
+
warn "Warning: If you enable XMLRPC access, using sqlite is not recommended."
|
34
|
+
warn "Warning: Random crash can happen because of concurrency."
|
35
|
+
|
36
|
+
if RUBY_PLATFORM == 'java'
|
37
|
+
require "jdbc/sqlite3"
|
38
|
+
Jdbc::SQLite3.load_driver
|
39
|
+
ActiveRecord::Base.establish_connection(
|
40
|
+
adapter: 'jdbcsqlite3',
|
41
|
+
database: Rbitter['sqlite3']['dbfile'],
|
42
|
+
timeout: 10000) # Long timeout for slow computer
|
43
|
+
else
|
44
|
+
ActiveRecord::Base.establish_connection(
|
45
|
+
adapter: 'sqlite3',
|
46
|
+
database: Rbitter['sqlite3']['dbfile'],
|
47
|
+
timeout: 10000) # Long timeout for slow computer
|
48
|
+
end
|
49
|
+
elsif Rbitter['activerecord'] == 'mysql2'
|
50
|
+
Jdbc::MySQL.load_driver if RUBY_PLATFORM == 'java'
|
51
|
+
|
52
|
+
ActiveRecord::Base.establish_connection(
|
53
|
+
adapter: (RUBY_PLATFORM == 'java' ? 'jdbcmysql' : 'mysql2'),
|
54
|
+
host: Rbitter['mysql2']['host'],
|
55
|
+
port: Rbitter['mysql2']['port'],
|
56
|
+
database: Rbitter['mysql2']['dbname'],
|
57
|
+
username: Rbitter['mysql2']['username'],
|
58
|
+
password: Rbitter['mysql2']['password'],
|
59
|
+
encoding: "utf8mb4",
|
60
|
+
collation: "utf8mb4_unicode_ci")
|
61
|
+
else
|
62
|
+
raise RuntimeException.new("Unknown configuration value. 'activerecord' value should be sqlite3 or mysql2.")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def update_database_scheme
|
67
|
+
current_version = ActiveRecord::Migrator.current_version
|
68
|
+
if current_version < SCHEME_VERSION
|
69
|
+
warn "[records] Your ActiveRecord scheme is outdated."
|
70
|
+
warn "[records] Migrate... #{current_version} => #{SCHEME_VERSION}"
|
71
|
+
ActiveRecord::Migrator.migrate(File.expand_path("../records_migrate", __FILE__), SCHEME_VERSION)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def prepare option_string=""
|
76
|
+
ActiveRecord::Schema.define(version: SCHEME_VERSION) {
|
77
|
+
# MySQL specific option_string:
|
78
|
+
# utf8mb4 -> supporting UTF-8 4-byte characters (i.e. Emoji)
|
79
|
+
create_table(:records, { :options => option_string }) do |t|
|
80
|
+
SCHEME.each_key { |column|
|
81
|
+
case SCHEME[column]
|
82
|
+
when :string
|
83
|
+
t.string column
|
84
|
+
when :integer
|
85
|
+
t.integer column, :limit => 8
|
86
|
+
when :datetime
|
87
|
+
t.datetime column
|
88
|
+
when :text
|
89
|
+
t.text column
|
90
|
+
else
|
91
|
+
puts "Unexpected column type '#{SCHEME[column]}' of #{column}"
|
92
|
+
end
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
add_index :records, :tweetid
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
def any_to_datestring(obj)
|
101
|
+
if obj.is_a?(String)
|
102
|
+
# try to parse it
|
103
|
+
DateTime.parse(obj).strftime("%Y-%m-%d %H:%M:%S")
|
104
|
+
elsif obj.is_a?(DateTime) or obj.is_a?(Time)
|
105
|
+
obj.strftime("%Y-%m-%d %H:%M:%S")
|
106
|
+
else
|
107
|
+
raise ArgumentError.new("Can\'t automatically extract DateTime info")
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def export_to_csv(csvfile)
|
112
|
+
open(csvfile, 'w') { |f|
|
113
|
+
f.write("marker,marker_msg,userid,username,tweetid,replyto,tweet,date,rt_count,fav_count")
|
114
|
+
f.write("\n")
|
115
|
+
Rbitter::Record.find_each { |t|
|
116
|
+
f.write("#{t.marker},#{t.marker_msg},#{t.userid},#{t.username},#{t.tweetid},")
|
117
|
+
f.write("#{t.replyto},#{t.tweet},#{t.date},#{t.rt_count},#{t.fav_count}\n")
|
118
|
+
}
|
119
|
+
}
|
120
|
+
end
|
121
|
+
end
|
File without changes
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'twitter'
|
4
|
+
|
5
|
+
module Rbitter
|
6
|
+
class DummyStreamClient
|
7
|
+
def initialize(tokens); end
|
8
|
+
|
9
|
+
def run(&operation_block)
|
10
|
+
internal(&operation_block)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
def internal(&operation_block)
|
15
|
+
tweets = [{
|
16
|
+
"tweetid" => 1,
|
17
|
+
"userid" => 1,
|
18
|
+
"replyto" => nil,
|
19
|
+
"tweet" => "test",
|
20
|
+
"rt_count" => 0,
|
21
|
+
"fav_count" => 0,
|
22
|
+
"screen_name" => "twitter",
|
23
|
+
"date" => "2015-01-01 12:11:10",
|
24
|
+
"media_urls" => ["https://pbs.twimg.com/media/CEPWFtgUgAEmbcV.png"],
|
25
|
+
"web_urls" => ["https://www.google.com/"]
|
26
|
+
}]
|
27
|
+
|
28
|
+
tweets.each { |tweet|
|
29
|
+
yield tweet
|
30
|
+
}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class StreamClient
|
35
|
+
def initialize(tokens)
|
36
|
+
@t = Twitter::Streaming::Client.new do |object|
|
37
|
+
object.consumer_key = tokens['consumer_key']
|
38
|
+
object.consumer_secret = tokens['consumer_secret']
|
39
|
+
object.access_token = tokens['access_token']
|
40
|
+
object.access_token_secret = tokens['access_token_secret']
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def run(&operation_block)
|
45
|
+
begin
|
46
|
+
internal(&operation_block)
|
47
|
+
rescue EOFError => e
|
48
|
+
puts "Network unreachable. Retry in 3 seconds..."
|
49
|
+
sleep 3
|
50
|
+
retry
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
def internal(&operation_block)
|
56
|
+
@t.user do |tweet|
|
57
|
+
if tweet.is_a?(Twitter::Tweet)
|
58
|
+
if tweet.retweet?
|
59
|
+
tweet = tweet.retweeted_tweet
|
60
|
+
end
|
61
|
+
|
62
|
+
text = tweet.full_text.gsub(/(\r\n|\n)/, '')
|
63
|
+
|
64
|
+
# unpack uris and media links
|
65
|
+
media_urls = Array.new
|
66
|
+
web_urls = Array.new
|
67
|
+
|
68
|
+
if tweet.entities?
|
69
|
+
if tweet.media?
|
70
|
+
tweet.media.each { |media|
|
71
|
+
media_urls.push("#{media.media_uri_https}")
|
72
|
+
text.gsub!("#{media.url}", "#{media.display_url}")
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
text += " "
|
77
|
+
text += media_urls.join(" ")
|
78
|
+
|
79
|
+
if tweet.uris?
|
80
|
+
tweet.uris.each { |uri|
|
81
|
+
web_urls.push("#{uri.expanded_url}")
|
82
|
+
text.gsub!("#{uri.url}", "#{uri.expanded_url}")
|
83
|
+
}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
res = {
|
88
|
+
"tweetid" => tweet.id,
|
89
|
+
"userid" => tweet.user.id,
|
90
|
+
"replyto" => tweet.in_reply_to_status_id? ? tweet.in_reply_to_status_id : nil,
|
91
|
+
"tweet" => text,
|
92
|
+
"rt_count" => tweet.retweet_count,
|
93
|
+
"fav_count" => tweet.favorite_count,
|
94
|
+
"screen_name" => tweet.user.screen_name,
|
95
|
+
"date" => tweet.created_at,
|
96
|
+
"media_urls" => media_urls,
|
97
|
+
"web_urls" => web_urls
|
98
|
+
}
|
99
|
+
|
100
|
+
yield res
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|