so2db 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +8 -0
- data/Gemfile.lock +37 -0
- data/MIT-LICENSE +8 -0
- data/README.md +101 -0
- data/Rakefile +8 -0
- data/bin/so2pg +5 -0
- data/lib/so2db/extensions.rb +42 -0
- data/lib/so2db/formatter.rb +113 -0
- data/lib/so2db/migrations.rb +308 -0
- data/lib/so2db/models.rb +106 -0
- data/lib/so2db.rb +126 -0
- data/lib/so2pg.rb +179 -0
- data/test/test_formatter.rb +120 -0
- data/test/test_models.rb +20 -0
- data/test/test_so2db.rb +17 -0
- data/test/test_so2pg.rb +120 -0
- metadata +163 -0
data/lib/so2db/models.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2012 Chad Taylor
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the "Software"), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in
|
12
|
+
# all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
|
22
|
+
require 'active_record'
|
23
|
+
|
24
|
+
module SO2DB::Models
|
25
|
+
|
26
|
+
class Badge < ActiveRecord::Base
|
27
|
+
def self.exported_fields
|
28
|
+
return [ :id, :user_id, :name, :date ]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Comment < ActiveRecord::Base
|
33
|
+
def self.exported_fields
|
34
|
+
return [ :id, :post_id, :score, :text, :creation_date, :user_id,
|
35
|
+
:user_display_name ]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class Post < ActiveRecord::Base
|
40
|
+
def self.exported_fields
|
41
|
+
return [ :id, :post_type_id, :parent_id, :accepted_answer_id,
|
42
|
+
:creation_date, :score, :view_count, :body, :owner_user_id,
|
43
|
+
:last_editor_user_id, :last_editor_display_name, :last_edit_date,
|
44
|
+
:last_activity_date, :community_owned_date, :closed_date, :title,
|
45
|
+
:tags, :answer_count, :comment_count, :favorite_count,
|
46
|
+
:owner_display_name ]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class PostHistory < ActiveRecord::Base
|
51
|
+
self.table_name = "post_history"
|
52
|
+
|
53
|
+
def self.exported_fields
|
54
|
+
return [ :id, :post_history_type_id, :post_id, :revision_guid,
|
55
|
+
:creation_date, :user_id, :user_display_name, :comment, :text,
|
56
|
+
:close_reason_id ]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class User < ActiveRecord::Base
|
61
|
+
def self.exported_fields
|
62
|
+
return [ :id, :reputation, :creation_date, :display_name, :email_hash,
|
63
|
+
:last_access_date, :website_url, :location, :age, :about_me,
|
64
|
+
:views, :up_votes, :down_votes ]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class Vote < ActiveRecord::Base
|
69
|
+
def self.exported_fields
|
70
|
+
return [ :id, :post_id, :vote_type_id, :creation_date, :user_id,
|
71
|
+
:bounty_amount ]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class PostType < ActiveRecord::Base
|
76
|
+
end
|
77
|
+
|
78
|
+
class PostHistoryType < ActiveRecord::Base
|
79
|
+
end
|
80
|
+
|
81
|
+
class CloseReason < ActiveRecord::Base
|
82
|
+
end
|
83
|
+
|
84
|
+
class VoteType < ActiveRecord::Base
|
85
|
+
end
|
86
|
+
|
87
|
+
# Infrastructure. Do not call this from your code.
|
88
|
+
class Lookup
|
89
|
+
|
90
|
+
@@map = { "badges" => :Badge, "comments" => :Comment,
|
91
|
+
"posthistory" => :PostHistory, "posts" => :Post, "users" => :User,
|
92
|
+
"votes" => :Vote }
|
93
|
+
|
94
|
+
def self.find_class(file_name)
|
95
|
+
Object.const_get("SO2DB").const_get("Models")
|
96
|
+
.const_get(@@map[file_name].to_s)
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.get_required_attrs(file_name)
|
100
|
+
raw = find_class(file_name).send :exported_fields
|
101
|
+
return raw.map {|f| f.to_s.camelize.sub(/Guid/, 'GUID')}
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
data/lib/so2db.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2012 Chad Taylor
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the "Software"), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in
|
12
|
+
# all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
|
22
|
+
module SO2DB
|
23
|
+
|
24
|
+
# Base class for StackOverflow data importers. Drives database setup and
|
25
|
+
# data importing files from a directory.
|
26
|
+
#
|
27
|
+
# Implementations of this class must provide a method with the following
|
28
|
+
# signature:
|
29
|
+
#
|
30
|
+
# import_stream(formatter)
|
31
|
+
#
|
32
|
+
# This method may be private. The purpose of this method is to actually
|
33
|
+
# perform the data import with data from the provided formatter. The
|
34
|
+
# formatter is provided to support scenarios of streaming data to STDIN
|
35
|
+
# (e.g., PostgreSQL's COPY command) as well as pushing data to a file before
|
36
|
+
# import (e.g., for MySQL's mysqlimport utility). It has type
|
37
|
+
# SO2DB::Formatter.
|
38
|
+
#
|
39
|
+
# The importer uses ActiveRecord for table creation and Foreigner for creating
|
40
|
+
# table relationships. You are limited to the databases supported by these
|
41
|
+
# libraries. In addition, a 'uuid' method must be avaiable to the adapter
|
42
|
+
# provided to ActiveRecord. (See so2pg for an example of an adapter extension
|
43
|
+
# that provides the method.)
|
44
|
+
#
|
45
|
+
# In addition, it provides two accessors for subclasses:
|
46
|
+
#
|
47
|
+
# attr_reader :conn_opts
|
48
|
+
# attr_accessor :delimiter
|
49
|
+
#
|
50
|
+
# The conn_opts property provides the ActiveRecord connection data (e.g.,
|
51
|
+
# :database, :host, etc.). The delimiter property sets the delimiter used by
|
52
|
+
# the formatter. The delimiter is \v (0xB) by default.
|
53
|
+
class Importer
|
54
|
+
|
55
|
+
# Initializes the importer.
|
56
|
+
#
|
57
|
+
# Arguments:
|
58
|
+
# relations: (Boolean) Indicates whether database relationships should
|
59
|
+
# be created.
|
60
|
+
# optionals: (Boolean) Indicates whether optional database tables and
|
61
|
+
# content should be created.
|
62
|
+
# adapter: (String) The ActiveRecord adapter name (e.g., 'postgresql').
|
63
|
+
# options: (Hash) The database connection options, as required by
|
64
|
+
# ActiveRecord for the provided adapter.
|
65
|
+
def initialize(relations = false, optionals = false, adapter = '', options = {})
|
66
|
+
@relations = relations
|
67
|
+
@optionals = optionals
|
68
|
+
@conn_opts = options.merge( { :adapter => adapter } )
|
69
|
+
@format_delimiter = 11.chr.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
# Creates the database tables and relationships, and imports the data in
|
73
|
+
# the files in the specified directory.
|
74
|
+
#
|
75
|
+
# Arguments:
|
76
|
+
# dir: (String) The directory path containting the StackOverflow data
|
77
|
+
# dump XML files (e.g., badges.xml, posts.xml, etc.).
|
78
|
+
def import(dir)
|
79
|
+
setup
|
80
|
+
create_basics
|
81
|
+
import_data(dir)
|
82
|
+
create_relations if @relations
|
83
|
+
create_optionals if @optionals
|
84
|
+
create_optional_relations if @relations and @optionals
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
attr_reader :conn_opts
|
90
|
+
attr_accessor :format_delimiter
|
91
|
+
|
92
|
+
def setup
|
93
|
+
ActiveRecord::Base.establish_connection @conn_opts
|
94
|
+
Foreigner.load
|
95
|
+
end
|
96
|
+
|
97
|
+
def create_basics
|
98
|
+
SO2DB::CreateBasicTables.new.up
|
99
|
+
end
|
100
|
+
|
101
|
+
def import_data(dir)
|
102
|
+
files = Dir.entries(dir).delete_if { |x| !x.end_with? 'xml' }
|
103
|
+
files.each do |f|
|
104
|
+
f = Formatter.new(File.join(dir, f), @format_delimiter)
|
105
|
+
import_stream f
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def create_relations
|
110
|
+
SO2DB::CreateRelationships.new.up
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_optionals
|
114
|
+
SO2DB::CreateOptionals.new.up
|
115
|
+
end
|
116
|
+
|
117
|
+
def create_optional_relations
|
118
|
+
SO2DB::CreateOptionalRelationships.new.up
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
require 'so2db/formatter'
|
125
|
+
require 'so2db/migrations'
|
126
|
+
require 'so2db/models'
|
data/lib/so2pg.rb
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2012 Chad Taylor
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the "Software"), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in
|
12
|
+
# all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
|
22
|
+
require 'optparse'
|
23
|
+
require 'so2db'
|
24
|
+
|
25
|
+
module ActiveRecord::ConnectionAdapters
|
26
|
+
class PostgreSQLAdapter < AbstractAdapter
|
27
|
+
|
28
|
+
# Extends the adapter to include support for a uuid type. This is required
|
29
|
+
# by the importer (see SO2DB::Importer for more information). For
|
30
|
+
# PostgreSQL, simply use the native 'uuid' type (for MySQL, use something
|
31
|
+
# a bit more contrived, like CHAR(16)).
|
32
|
+
def uuid
|
33
|
+
return 'uuid', {}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Imports the StackOverflow data into a PostgreSQL data.
|
39
|
+
class PgImporter < SO2DB::Importer
|
40
|
+
|
41
|
+
# (See SO2DB::Importer.initialize documentation)
|
42
|
+
def initialize(relations = false, optionals = false, options = {})
|
43
|
+
super(relations, optionals, "postgresql", options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.import_from_argv(argv)
|
47
|
+
# Parse the command-line options
|
48
|
+
cmd_opts = PgOptionsParser.parse(ARGV)
|
49
|
+
|
50
|
+
# If all validation passed, then execute the import!
|
51
|
+
if cmd_opts
|
52
|
+
start = Time.now
|
53
|
+
pg = PgImporter.new(cmd_opts.has_key?(:relationships),
|
54
|
+
cmd_opts.has_key?(:optionals),
|
55
|
+
cmd_opts)
|
56
|
+
pg.import(cmd_opts[:dir])
|
57
|
+
puts "Import completed in #{Time.now - start}s"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
# Imports the data from the formatter into the PostgreSQL database.
|
64
|
+
#
|
65
|
+
# Note that what follows is just one way to implement the importer. You
|
66
|
+
# could just as easily push the formatted data into a file and then ask
|
67
|
+
# the database to suck that file in.
|
68
|
+
def import_stream(formatter)
|
69
|
+
puts "Importing file #{formatter.file_name}..."
|
70
|
+
start = Time.now
|
71
|
+
|
72
|
+
sql = build_sql(formatter.value_str)
|
73
|
+
cmd = build_cmd(sql)
|
74
|
+
execute_cmd(cmd, formatter)
|
75
|
+
|
76
|
+
puts " -> #{Time.now - start}s"
|
77
|
+
end
|
78
|
+
|
79
|
+
# Builds the SQL command used for bulk loading the tables.
|
80
|
+
def build_sql(value_str)
|
81
|
+
"COPY #{value_str} FROM STDIN WITH (FORMAT csv, DELIMITER E'\x0B')"
|
82
|
+
end
|
83
|
+
|
84
|
+
# Builds the import command with the given SQL command and the global
|
85
|
+
# connection options.
|
86
|
+
#
|
87
|
+
# Example:
|
88
|
+
# >> sql = "COPY ..."
|
89
|
+
# >> puts build_cmd(sql)
|
90
|
+
# => psql -d test -h localhost -c "COPY ..."
|
91
|
+
def build_cmd(sql)
|
92
|
+
# Only exists within the context of this script (not exported), so this
|
93
|
+
# does not degrade security posture after the script has completed
|
94
|
+
ENV['PGPASSWORD'] = conn_opts[:password] if conn_opts.has_key? :password
|
95
|
+
|
96
|
+
cmd = "psql"
|
97
|
+
cmd << " -d #{conn_opts[:database]}" if conn_opts.has_key? :database
|
98
|
+
cmd << " -h #{conn_opts[:host]}" if conn_opts.has_key? :host
|
99
|
+
cmd << " -U #{conn_opts[:username]}" if conn_opts.has_key? :username
|
100
|
+
cmd << " -p #{conn_opts[:port]}" if conn_opts.has_key? :port
|
101
|
+
cmd << " -c \"#{sql}\""
|
102
|
+
|
103
|
+
return cmd
|
104
|
+
end
|
105
|
+
|
106
|
+
# Executes the provided shell command and pumps the data from the formatter
|
107
|
+
# to it over stdin.
|
108
|
+
def execute_cmd(cmd, formatter)
|
109
|
+
IO.popen(cmd, 'r+') do |s|
|
110
|
+
formatter.format(s)
|
111
|
+
s.close_write
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
class PgOptionsParser
|
118
|
+
|
119
|
+
# Parses the command-line arguments into a Hash object. Note that the members
|
120
|
+
# of the Hash have the same name as the ActiveRecord parameters (e.g., :host,
|
121
|
+
# :database, etc.). This Hash will actually be passed to ActiveRecord for
|
122
|
+
# consumption.
|
123
|
+
def self.parse(args)
|
124
|
+
options = {}
|
125
|
+
|
126
|
+
opts = OptionParser.new do |opts|
|
127
|
+
opts.banner = <<-EOB
|
128
|
+
Imports a StackOverflow data dump into a PostgreSQL database.
|
129
|
+
Usage: so2pg [options]
|
130
|
+
EOB
|
131
|
+
|
132
|
+
opts.on("-H", "--host HOST", "The database host") do |host|
|
133
|
+
options[:host] = host
|
134
|
+
end
|
135
|
+
|
136
|
+
opts.on("-d", "--database DBNAME", "The name of the database (REQUIRED)") do |dbname|
|
137
|
+
options[:database] = dbname
|
138
|
+
end
|
139
|
+
|
140
|
+
opts.on("-D", "--directory DIRECTORY", "The data directory path (REQUIRED)") do |dir|
|
141
|
+
options[:dir] = dir
|
142
|
+
end
|
143
|
+
|
144
|
+
opts.on("-u", "--user USER", "The user name") do |user|
|
145
|
+
options[:username] = user
|
146
|
+
end
|
147
|
+
|
148
|
+
opts.on("-p", "--password PASSWORD", "The user's password") do |password|
|
149
|
+
options[:password] = password
|
150
|
+
end
|
151
|
+
|
152
|
+
opts.on("-P", "--port PORT_NUMBER", "The port number") do |port|
|
153
|
+
options[:port] = port
|
154
|
+
end
|
155
|
+
|
156
|
+
opts.on("-O", "--include-optionals", "Includes optional tables") do
|
157
|
+
options[:optionals] = true
|
158
|
+
end
|
159
|
+
|
160
|
+
opts.on("-R", "--include-relationships", "Includes table relationships") do
|
161
|
+
options[:relationships] = true
|
162
|
+
end
|
163
|
+
|
164
|
+
opts.on("-h", "--help", "Show this help screen") do |help|
|
165
|
+
options[:help] = true
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
opts.parse!(args)
|
171
|
+
if(options[:help] or !options.has_key? :dir or !options.has_key? :database)
|
172
|
+
puts opts.help
|
173
|
+
nil
|
174
|
+
else
|
175
|
+
options
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'mocha'
|
3
|
+
require 'so2db'
|
4
|
+
|
5
|
+
class FormatterTest < Test::Unit::TestCase
|
6
|
+
include Rake::DSL
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@formatter = SO2DB::Formatter.new("/tmp/badges.xml")
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_initializer_with_default_args
|
13
|
+
f = SO2DB::Formatter.new
|
14
|
+
assert_equal '', f.instance_variable_get(:@path)
|
15
|
+
assert_equal 11.chr.to_s, f.instance_variable_get(:@delimiter)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_initializer_with_provided_args
|
19
|
+
path = '/my/test/path'
|
20
|
+
delimiter = 12.chr.to_s
|
21
|
+
f = SO2DB::Formatter.new(path, delimiter)
|
22
|
+
|
23
|
+
assert_equal path, f.instance_variable_get(:@path)
|
24
|
+
assert_equal delimiter, f.instance_variable_get(:@delimiter)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_format
|
28
|
+
file = "file"
|
29
|
+
outstream = "outstream"
|
30
|
+
attrs = [ :a, :b, :c ]
|
31
|
+
SO2DB::Models::Lookup.expects(:get_required_attrs).with("badges").once.returns(attrs)
|
32
|
+
File.expects(:open).with("/tmp/badges.xml").once.returns(file)
|
33
|
+
@formatter.expects(:format_from_stream).with(file, attrs, outstream).once.returns("x")
|
34
|
+
|
35
|
+
result = @formatter.format(outstream)
|
36
|
+
assert_equal "x", result
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_format_from_stream
|
40
|
+
x = <<-eoxml
|
41
|
+
<data>
|
42
|
+
<fake-row id="3"/>
|
43
|
+
<row Id="1" UserId="2" Name="Autobiographer" Date="2010-07-20T19:07:22.990" />
|
44
|
+
</data>
|
45
|
+
eoxml
|
46
|
+
|
47
|
+
r, w = IO.pipe
|
48
|
+
|
49
|
+
arr = [ "Id", "UserId", "Name", "Date", "Missing" ]
|
50
|
+
@formatter.send(:format_from_stream, x, arr, w)
|
51
|
+
|
52
|
+
values = [ '2010-07-20T19:07:22.990', '1', '', 'Autobiographer', '2' ]
|
53
|
+
expected = values.join(11.chr.to_s) << "\n"
|
54
|
+
actual = r.gets
|
55
|
+
|
56
|
+
assert_equal expected, actual
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_file_name
|
60
|
+
assert_equal "badges.xml", @formatter.file_name
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_value_str
|
64
|
+
assert_equal "badges(date,id,name,user_id)", @formatter.value_str
|
65
|
+
end
|
66
|
+
|
67
|
+
def create_node_stub(name, node_type)
|
68
|
+
obj = mock()
|
69
|
+
obj.stubs(:name).returns(name)
|
70
|
+
obj.stubs(:node_type).returns(node_type)
|
71
|
+
|
72
|
+
obj
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_element_start_with_good_values
|
76
|
+
node = create_node_stub("row", Nokogiri::XML::Reader::TYPE_ELEMENT)
|
77
|
+
assert @formatter.send(:element_start?, node)
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_element_start_with_invalid_type
|
81
|
+
node = create_node_stub("row", Nokogiri::XML::Reader::TYPE_END_ELEMENT)
|
82
|
+
assert_equal false, @formatter.send(:element_start?, node)
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_element_start_with_invalid_name
|
86
|
+
node = create_node_stub("badges", Nokogiri::XML::Reader::TYPE_ELEMENT)
|
87
|
+
assert_equal false, @formatter.send(:element_start?, node)
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_format_node
|
91
|
+
node = mock()
|
92
|
+
node.stubs(:attribute).with("Id").returns("1")
|
93
|
+
node.stubs(:attribute).with("Name").returns("Anony Mous")
|
94
|
+
|
95
|
+
@formatter.expects(:scrub).with("1").once.returns("1")
|
96
|
+
@formatter.expects(:scrub).with("Anony Mous").once.returns("Anony Mous")
|
97
|
+
|
98
|
+
result = @formatter.send(:format_node, node, [ "Id", "Name" ])
|
99
|
+
|
100
|
+
assert_equal "1\vAnony Mous", result
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_format_node_with_missing_attribute
|
104
|
+
node = mock()
|
105
|
+
node.stubs(:attribute).with("Id").returns("1")
|
106
|
+
node.stubs(:attribute).with("Name").returns(nil)
|
107
|
+
|
108
|
+
@formatter.expects(:scrub).once.with("1").returns("1")
|
109
|
+
|
110
|
+
result = @formatter.send(:format_node, node, [ "Id", "Name" ])
|
111
|
+
|
112
|
+
assert_equal "1\v", result
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_scrub
|
116
|
+
assert_equal '<asdffdsa>', @formatter.send(:scrub, "<asdf\nfdsa\r>")
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
data/test/test_models.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'so2db'
|
3
|
+
|
4
|
+
class LookupTest < Test::Unit::TestCase
|
5
|
+
include Rake::DSL
|
6
|
+
|
7
|
+
def test_lookup_badges
|
8
|
+
assert_equal SO2DB::Models::Badge, SO2DB::Models::Lookup::find_class("badges")
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_required_attrs_badges
|
12
|
+
attrs = SO2DB::Models::Lookup::get_required_attrs("badges")
|
13
|
+
assert_equal [ "Id", "UserId", "Name", "Date" ], attrs
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_guid_capitalization
|
17
|
+
attrs = SO2DB::Models::Lookup::get_required_attrs("posthistory")
|
18
|
+
assert_block { attrs.include? 'RevisionGUID' }
|
19
|
+
end
|
20
|
+
end
|
data/test/test_so2db.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'mocha'
|
3
|
+
require 'so2db'
|
4
|
+
|
5
|
+
class ImporterTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_import_data
|
8
|
+
importer = SO2DB::Importer.new
|
9
|
+
Dir.expects(:entries).once.with('/tmp').returns([ 'test.bak', 'test.xml' ])
|
10
|
+
SO2DB::Formatter.expects(:new).once
|
11
|
+
.with('/tmp/test.xml', 11.chr.to_s).returns('formatter')
|
12
|
+
importer.expects(:import_stream).once.with('formatter')
|
13
|
+
|
14
|
+
importer.send(:import_data, '/tmp')
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
data/test/test_so2pg.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'mocha'
|
3
|
+
require 'so2pg'
|
4
|
+
|
5
|
+
class PgImporterTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@importer = PgImporter.new(true, true, { :database => "dbname", :dir => "dir" })
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_import_stream
|
12
|
+
formatter = mock()
|
13
|
+
formatter.stubs(:file_name).returns("file_name")
|
14
|
+
formatter.expects(:value_str).once.returns('badges(id,name)')
|
15
|
+
|
16
|
+
@importer.expects(:build_sql).once.with('badges(id,name)').returns("COPY...")
|
17
|
+
@importer.expects(:build_cmd).once.with("COPY...").returns("cmd")
|
18
|
+
@importer.stubs(:execute_cmd).returns('')
|
19
|
+
@importer.expects(:execute_cmd).once.with("cmd", formatter).returns('')
|
20
|
+
|
21
|
+
$stdout.stubs(:puts).returns('')
|
22
|
+
|
23
|
+
@importer.send(:import_stream, formatter)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_build_sql
|
27
|
+
exp = "COPY badges(id,name) FROM STDIN WITH (FORMAT csv, DELIMITER E'\x0B')"
|
28
|
+
assert_equal exp, @importer.send(:build_sql, "badges(id,name)")
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_build_cmd
|
32
|
+
expected = "psql -d dbname -c \"COPY...\""
|
33
|
+
actual = @importer.send(:build_cmd, "COPY...")
|
34
|
+
|
35
|
+
assert_equal expected, actual
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_build_cmd_sets_env_password
|
39
|
+
importer = PgImporter.new(true, true, { :database => "dbname",
|
40
|
+
:dir => "dir",
|
41
|
+
:password => "asdf1234" })
|
42
|
+
|
43
|
+
importer.send(:build_cmd, "COPY...")
|
44
|
+
assert_equal "asdf1234", ENV['PGPASSWORD']
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_execute_cmd
|
48
|
+
strm = mock()
|
49
|
+
formatter = mock()
|
50
|
+
|
51
|
+
IO.expects(:popen).once.with("cmd", "r+").yields(strm)
|
52
|
+
formatter.expects(:format).once.with(strm)
|
53
|
+
strm.expects(:close_write).once
|
54
|
+
|
55
|
+
@importer.send(:execute_cmd, "cmd", formatter)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
class PgOptionsParserTest < Test::Unit::TestCase
|
61
|
+
|
62
|
+
def test_all_options
|
63
|
+
host = 'localhost'
|
64
|
+
database = 'database_name'
|
65
|
+
directory = 'data_directory'
|
66
|
+
user = 'anony'
|
67
|
+
password = 'mous'
|
68
|
+
port = '1234'
|
69
|
+
|
70
|
+
cmd = [ '-H', host, '-d', database, '-D', directory, '-u', user,
|
71
|
+
'-p', password, '-P', port, '-O', '-R' ]
|
72
|
+
|
73
|
+
options = PgOptionsParser.parse(cmd)
|
74
|
+
|
75
|
+
assert_equal host, options[:host]
|
76
|
+
assert_equal database, options[:database]
|
77
|
+
assert_equal directory, options[:dir]
|
78
|
+
assert_equal user, options[:username]
|
79
|
+
assert_equal password, options[:password]
|
80
|
+
assert_equal port, options[:port]
|
81
|
+
assert options[:optionals]
|
82
|
+
assert options[:relationships]
|
83
|
+
end
|
84
|
+
|
85
|
+
def assert_help_displayed(cmd)
|
86
|
+
$stdout.expects(:puts).returns('')
|
87
|
+
options = PgOptionsParser.parse(cmd)
|
88
|
+
|
89
|
+
assert_nil options
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_options_without_database
|
93
|
+
assert_help_displayed [ '-D', 'data_dir' ]
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_options_without_data_dir
|
97
|
+
assert_help_displayed [ '-d', 'database_name' ]
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_options_with_help
|
101
|
+
assert_help_displayed [ '-D', 'data_dir', '-d', 'database_name', '-h' ]
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_options_without_optionals
|
105
|
+
cmd = [ '-D', 'data_dir', '-d', 'database_name', '-R' ]
|
106
|
+
options = PgOptionsParser.parse(cmd)
|
107
|
+
|
108
|
+
assert options[:relationships]
|
109
|
+
assert !options[:optionals]
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_options_without_relationships
|
113
|
+
cmd = [ '-D', 'data_dir', '-d', 'database_name', '-O' ]
|
114
|
+
options = PgOptionsParser.parse(cmd)
|
115
|
+
|
116
|
+
assert !options[:relationships]
|
117
|
+
assert options[:optionals]
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|