so2db 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +8 -0
- data/Gemfile.lock +37 -0
- data/MIT-LICENSE +8 -0
- data/README.md +101 -0
- data/Rakefile +8 -0
- data/bin/so2pg +5 -0
- data/lib/so2db/extensions.rb +42 -0
- data/lib/so2db/formatter.rb +113 -0
- data/lib/so2db/migrations.rb +308 -0
- data/lib/so2db/models.rb +106 -0
- data/lib/so2db.rb +126 -0
- data/lib/so2pg.rb +179 -0
- data/test/test_formatter.rb +120 -0
- data/test/test_models.rb +20 -0
- data/test/test_so2db.rb +17 -0
- data/test/test_so2pg.rb +120 -0
- metadata +163 -0
data/lib/so2db/models.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2012 Chad Taylor
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the "Software"), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in
|
12
|
+
# all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
|
22
|
+
require 'active_record'
|
23
|
+
|
24
|
+
module SO2DB::Models
|
25
|
+
|
26
|
+
class Badge < ActiveRecord::Base
|
27
|
+
def self.exported_fields
|
28
|
+
return [ :id, :user_id, :name, :date ]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Comment < ActiveRecord::Base
|
33
|
+
def self.exported_fields
|
34
|
+
return [ :id, :post_id, :score, :text, :creation_date, :user_id,
|
35
|
+
:user_display_name ]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class Post < ActiveRecord::Base
|
40
|
+
def self.exported_fields
|
41
|
+
return [ :id, :post_type_id, :parent_id, :accepted_answer_id,
|
42
|
+
:creation_date, :score, :view_count, :body, :owner_user_id,
|
43
|
+
:last_editor_user_id, :last_editor_display_name, :last_edit_date,
|
44
|
+
:last_activity_date, :community_owned_date, :closed_date, :title,
|
45
|
+
:tags, :answer_count, :comment_count, :favorite_count,
|
46
|
+
:owner_display_name ]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class PostHistory < ActiveRecord::Base
|
51
|
+
self.table_name = "post_history"
|
52
|
+
|
53
|
+
def self.exported_fields
|
54
|
+
return [ :id, :post_history_type_id, :post_id, :revision_guid,
|
55
|
+
:creation_date, :user_id, :user_display_name, :comment, :text,
|
56
|
+
:close_reason_id ]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class User < ActiveRecord::Base
|
61
|
+
def self.exported_fields
|
62
|
+
return [ :id, :reputation, :creation_date, :display_name, :email_hash,
|
63
|
+
:last_access_date, :website_url, :location, :age, :about_me,
|
64
|
+
:views, :up_votes, :down_votes ]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class Vote < ActiveRecord::Base
|
69
|
+
def self.exported_fields
|
70
|
+
return [ :id, :post_id, :vote_type_id, :creation_date, :user_id,
|
71
|
+
:bounty_amount ]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class PostType < ActiveRecord::Base
|
76
|
+
end
|
77
|
+
|
78
|
+
class PostHistoryType < ActiveRecord::Base
|
79
|
+
end
|
80
|
+
|
81
|
+
class CloseReason < ActiveRecord::Base
|
82
|
+
end
|
83
|
+
|
84
|
+
class VoteType < ActiveRecord::Base
|
85
|
+
end
|
86
|
+
|
87
|
+
# Infrastructure. Do not call this from your code.
|
88
|
+
class Lookup
|
89
|
+
|
90
|
+
@@map = { "badges" => :Badge, "comments" => :Comment,
|
91
|
+
"posthistory" => :PostHistory, "posts" => :Post, "users" => :User,
|
92
|
+
"votes" => :Vote }
|
93
|
+
|
94
|
+
def self.find_class(file_name)
|
95
|
+
Object.const_get("SO2DB").const_get("Models")
|
96
|
+
.const_get(@@map[file_name].to_s)
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.get_required_attrs(file_name)
|
100
|
+
raw = find_class(file_name).send :exported_fields
|
101
|
+
return raw.map {|f| f.to_s.camelize.sub(/Guid/, 'GUID')}
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
data/lib/so2db.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2012 Chad Taylor
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the "Software"), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in
|
12
|
+
# all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
|
22
|
+
module SO2DB
|
23
|
+
|
24
|
+
# Base class for StackOverflow data importers. Drives database setup and
|
25
|
+
# data importing files from a directory.
|
26
|
+
#
|
27
|
+
# Implementations of this class must provide a method with the following
|
28
|
+
# signature:
|
29
|
+
#
|
30
|
+
# import_stream(formatter)
|
31
|
+
#
|
32
|
+
# This method may be private. The purpose of this method is to actually
|
33
|
+
# perform the data import with data from the provided formatter. The
|
34
|
+
# formatter is provided to support scenarios of streaming data to STDIN
|
35
|
+
# (e.g., PostgreSQL's COPY command) as well as pushing data to a file before
|
36
|
+
# import (e.g., for MySQL's mysqlimport utility). It has type
|
37
|
+
# SO2DB::Formatter.
|
38
|
+
#
|
39
|
+
# The importer uses ActiveRecord for table creation and Foreigner for creating
|
40
|
+
# table relationships. You are limited to the databases supported by these
|
41
|
+
# libraries. In addition, a 'uuid' method must be avaiable to the adapter
|
42
|
+
# provided to ActiveRecord. (See so2pg for an example of an adapter extension
|
43
|
+
# that provides the method.)
|
44
|
+
#
|
45
|
+
# In addition, it provides two accessors for subclasses:
|
46
|
+
#
|
47
|
+
# attr_reader :conn_opts
|
48
|
+
# attr_accessor :delimiter
|
49
|
+
#
|
50
|
+
# The conn_opts property provides the ActiveRecord connection data (e.g.,
|
51
|
+
# :database, :host, etc.). The delimiter property sets the delimiter used by
|
52
|
+
# the formatter. The delimiter is \v (0xB) by default.
|
53
|
+
class Importer
|
54
|
+
|
55
|
+
# Initializes the importer.
|
56
|
+
#
|
57
|
+
# Arguments:
|
58
|
+
# relations: (Boolean) Indicates whether database relationships should
|
59
|
+
# be created.
|
60
|
+
# optionals: (Boolean) Indicates whether optional database tables and
|
61
|
+
# content should be created.
|
62
|
+
# adapter: (String) The ActiveRecord adapter name (e.g., 'postgresql').
|
63
|
+
# options: (Hash) The database connection options, as required by
|
64
|
+
# ActiveRecord for the provided adapter.
|
65
|
+
def initialize(relations = false, optionals = false, adapter = '', options = {})
|
66
|
+
@relations = relations
|
67
|
+
@optionals = optionals
|
68
|
+
@conn_opts = options.merge( { :adapter => adapter } )
|
69
|
+
@format_delimiter = 11.chr.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
# Creates the database tables and relationships, and imports the data in
|
73
|
+
# the files in the specified directory.
|
74
|
+
#
|
75
|
+
# Arguments:
|
76
|
+
# dir: (String) The directory path containting the StackOverflow data
|
77
|
+
# dump XML files (e.g., badges.xml, posts.xml, etc.).
|
78
|
+
def import(dir)
|
79
|
+
setup
|
80
|
+
create_basics
|
81
|
+
import_data(dir)
|
82
|
+
create_relations if @relations
|
83
|
+
create_optionals if @optionals
|
84
|
+
create_optional_relations if @relations and @optionals
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
attr_reader :conn_opts
|
90
|
+
attr_accessor :format_delimiter
|
91
|
+
|
92
|
+
def setup
|
93
|
+
ActiveRecord::Base.establish_connection @conn_opts
|
94
|
+
Foreigner.load
|
95
|
+
end
|
96
|
+
|
97
|
+
def create_basics
|
98
|
+
SO2DB::CreateBasicTables.new.up
|
99
|
+
end
|
100
|
+
|
101
|
+
def import_data(dir)
|
102
|
+
files = Dir.entries(dir).delete_if { |x| !x.end_with? 'xml' }
|
103
|
+
files.each do |f|
|
104
|
+
f = Formatter.new(File.join(dir, f), @format_delimiter)
|
105
|
+
import_stream f
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def create_relations
|
110
|
+
SO2DB::CreateRelationships.new.up
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_optionals
|
114
|
+
SO2DB::CreateOptionals.new.up
|
115
|
+
end
|
116
|
+
|
117
|
+
def create_optional_relations
|
118
|
+
SO2DB::CreateOptionalRelationships.new.up
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
require 'so2db/formatter'
|
125
|
+
require 'so2db/migrations'
|
126
|
+
require 'so2db/models'
|
data/lib/so2pg.rb
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2012 Chad Taylor
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the "Software"), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in
|
12
|
+
# all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
|
22
|
+
require 'optparse'
|
23
|
+
require 'so2db'
|
24
|
+
|
25
|
+
module ActiveRecord::ConnectionAdapters
|
26
|
+
class PostgreSQLAdapter < AbstractAdapter
|
27
|
+
|
28
|
+
# Extends the adapter to include support for a uuid type. This is required
|
29
|
+
# by the importer (see SO2DB::Importer for more information). For
|
30
|
+
# PostgreSQL, simply use the native 'uuid' type (for MySQL, use something
|
31
|
+
# a bit more contrived, like CHAR(16)).
|
32
|
+
def uuid
|
33
|
+
return 'uuid', {}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Imports the StackOverflow data into a PostgreSQL data.
|
39
|
+
class PgImporter < SO2DB::Importer
|
40
|
+
|
41
|
+
# (See SO2DB::Importer.initialize documentation)
|
42
|
+
def initialize(relations = false, optionals = false, options = {})
|
43
|
+
super(relations, optionals, "postgresql", options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.import_from_argv(argv)
|
47
|
+
# Parse the command-line options
|
48
|
+
cmd_opts = PgOptionsParser.parse(ARGV)
|
49
|
+
|
50
|
+
# If all validation passed, then execute the import!
|
51
|
+
if cmd_opts
|
52
|
+
start = Time.now
|
53
|
+
pg = PgImporter.new(cmd_opts.has_key?(:relationships),
|
54
|
+
cmd_opts.has_key?(:optionals),
|
55
|
+
cmd_opts)
|
56
|
+
pg.import(cmd_opts[:dir])
|
57
|
+
puts "Import completed in #{Time.now - start}s"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
# Imports the data from the formatter into the PostgreSQL database.
|
64
|
+
#
|
65
|
+
# Note that what follows is just one way to implement the importer. You
|
66
|
+
# could just as easily push the formatted data into a file and then ask
|
67
|
+
# the database to suck that file in.
|
68
|
+
def import_stream(formatter)
|
69
|
+
puts "Importing file #{formatter.file_name}..."
|
70
|
+
start = Time.now
|
71
|
+
|
72
|
+
sql = build_sql(formatter.value_str)
|
73
|
+
cmd = build_cmd(sql)
|
74
|
+
execute_cmd(cmd, formatter)
|
75
|
+
|
76
|
+
puts " -> #{Time.now - start}s"
|
77
|
+
end
|
78
|
+
|
79
|
+
# Builds the SQL command used for bulk loading the tables.
|
80
|
+
def build_sql(value_str)
|
81
|
+
"COPY #{value_str} FROM STDIN WITH (FORMAT csv, DELIMITER E'\x0B')"
|
82
|
+
end
|
83
|
+
|
84
|
+
# Builds the import command with the given SQL command and the global
|
85
|
+
# connection options.
|
86
|
+
#
|
87
|
+
# Example:
|
88
|
+
# >> sql = "COPY ..."
|
89
|
+
# >> puts build_cmd(sql)
|
90
|
+
# => psql -d test -h localhost -c "COPY ..."
|
91
|
+
def build_cmd(sql)
|
92
|
+
# Only exists within the context of this script (not exported), so this
|
93
|
+
# does not degrade security posture after the script has completed
|
94
|
+
ENV['PGPASSWORD'] = conn_opts[:password] if conn_opts.has_key? :password
|
95
|
+
|
96
|
+
cmd = "psql"
|
97
|
+
cmd << " -d #{conn_opts[:database]}" if conn_opts.has_key? :database
|
98
|
+
cmd << " -h #{conn_opts[:host]}" if conn_opts.has_key? :host
|
99
|
+
cmd << " -U #{conn_opts[:username]}" if conn_opts.has_key? :username
|
100
|
+
cmd << " -p #{conn_opts[:port]}" if conn_opts.has_key? :port
|
101
|
+
cmd << " -c \"#{sql}\""
|
102
|
+
|
103
|
+
return cmd
|
104
|
+
end
|
105
|
+
|
106
|
+
# Executes the provided shell command and pumps the data from the formatter
|
107
|
+
# to it over stdin.
|
108
|
+
def execute_cmd(cmd, formatter)
|
109
|
+
IO.popen(cmd, 'r+') do |s|
|
110
|
+
formatter.format(s)
|
111
|
+
s.close_write
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
class PgOptionsParser
|
118
|
+
|
119
|
+
# Parses the command-line arguments into a Hash object. Note that the members
|
120
|
+
# of the Hash have the same name as the ActiveRecord parameters (e.g., :host,
|
121
|
+
# :database, etc.). This Hash will actually be passed to ActiveRecord for
|
122
|
+
# consumption.
|
123
|
+
def self.parse(args)
|
124
|
+
options = {}
|
125
|
+
|
126
|
+
opts = OptionParser.new do |opts|
|
127
|
+
opts.banner = <<-EOB
|
128
|
+
Imports a StackOverflow data dump into a PostgreSQL database.
|
129
|
+
Usage: so2pg [options]
|
130
|
+
EOB
|
131
|
+
|
132
|
+
opts.on("-H", "--host HOST", "The database host") do |host|
|
133
|
+
options[:host] = host
|
134
|
+
end
|
135
|
+
|
136
|
+
opts.on("-d", "--database DBNAME", "The name of the database (REQUIRED)") do |dbname|
|
137
|
+
options[:database] = dbname
|
138
|
+
end
|
139
|
+
|
140
|
+
opts.on("-D", "--directory DIRECTORY", "The data directory path (REQUIRED)") do |dir|
|
141
|
+
options[:dir] = dir
|
142
|
+
end
|
143
|
+
|
144
|
+
opts.on("-u", "--user USER", "The user name") do |user|
|
145
|
+
options[:username] = user
|
146
|
+
end
|
147
|
+
|
148
|
+
opts.on("-p", "--password PASSWORD", "The user's password") do |password|
|
149
|
+
options[:password] = password
|
150
|
+
end
|
151
|
+
|
152
|
+
opts.on("-P", "--port PORT_NUMBER", "The port number") do |port|
|
153
|
+
options[:port] = port
|
154
|
+
end
|
155
|
+
|
156
|
+
opts.on("-O", "--include-optionals", "Includes optional tables") do
|
157
|
+
options[:optionals] = true
|
158
|
+
end
|
159
|
+
|
160
|
+
opts.on("-R", "--include-relationships", "Includes table relationships") do
|
161
|
+
options[:relationships] = true
|
162
|
+
end
|
163
|
+
|
164
|
+
opts.on("-h", "--help", "Show this help screen") do |help|
|
165
|
+
options[:help] = true
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
opts.parse!(args)
|
171
|
+
if(options[:help] or !options.has_key? :dir or !options.has_key? :database)
|
172
|
+
puts opts.help
|
173
|
+
nil
|
174
|
+
else
|
175
|
+
options
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'mocha'
|
3
|
+
require 'so2db'
|
4
|
+
|
5
|
+
class FormatterTest < Test::Unit::TestCase
|
6
|
+
include Rake::DSL
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@formatter = SO2DB::Formatter.new("/tmp/badges.xml")
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_initializer_with_default_args
|
13
|
+
f = SO2DB::Formatter.new
|
14
|
+
assert_equal '', f.instance_variable_get(:@path)
|
15
|
+
assert_equal 11.chr.to_s, f.instance_variable_get(:@delimiter)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_initializer_with_provided_args
|
19
|
+
path = '/my/test/path'
|
20
|
+
delimiter = 12.chr.to_s
|
21
|
+
f = SO2DB::Formatter.new(path, delimiter)
|
22
|
+
|
23
|
+
assert_equal path, f.instance_variable_get(:@path)
|
24
|
+
assert_equal delimiter, f.instance_variable_get(:@delimiter)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_format
|
28
|
+
file = "file"
|
29
|
+
outstream = "outstream"
|
30
|
+
attrs = [ :a, :b, :c ]
|
31
|
+
SO2DB::Models::Lookup.expects(:get_required_attrs).with("badges").once.returns(attrs)
|
32
|
+
File.expects(:open).with("/tmp/badges.xml").once.returns(file)
|
33
|
+
@formatter.expects(:format_from_stream).with(file, attrs, outstream).once.returns("x")
|
34
|
+
|
35
|
+
result = @formatter.format(outstream)
|
36
|
+
assert_equal "x", result
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_format_from_stream
|
40
|
+
x = <<-eoxml
|
41
|
+
<data>
|
42
|
+
<fake-row id="3"/>
|
43
|
+
<row Id="1" UserId="2" Name="Autobiographer" Date="2010-07-20T19:07:22.990" />
|
44
|
+
</data>
|
45
|
+
eoxml
|
46
|
+
|
47
|
+
r, w = IO.pipe
|
48
|
+
|
49
|
+
arr = [ "Id", "UserId", "Name", "Date", "Missing" ]
|
50
|
+
@formatter.send(:format_from_stream, x, arr, w)
|
51
|
+
|
52
|
+
values = [ '2010-07-20T19:07:22.990', '1', '', 'Autobiographer', '2' ]
|
53
|
+
expected = values.join(11.chr.to_s) << "\n"
|
54
|
+
actual = r.gets
|
55
|
+
|
56
|
+
assert_equal expected, actual
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_file_name
|
60
|
+
assert_equal "badges.xml", @formatter.file_name
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_value_str
|
64
|
+
assert_equal "badges(date,id,name,user_id)", @formatter.value_str
|
65
|
+
end
|
66
|
+
|
67
|
+
def create_node_stub(name, node_type)
|
68
|
+
obj = mock()
|
69
|
+
obj.stubs(:name).returns(name)
|
70
|
+
obj.stubs(:node_type).returns(node_type)
|
71
|
+
|
72
|
+
obj
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_element_start_with_good_values
|
76
|
+
node = create_node_stub("row", Nokogiri::XML::Reader::TYPE_ELEMENT)
|
77
|
+
assert @formatter.send(:element_start?, node)
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_element_start_with_invalid_type
|
81
|
+
node = create_node_stub("row", Nokogiri::XML::Reader::TYPE_END_ELEMENT)
|
82
|
+
assert_equal false, @formatter.send(:element_start?, node)
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_element_start_with_invalid_name
|
86
|
+
node = create_node_stub("badges", Nokogiri::XML::Reader::TYPE_ELEMENT)
|
87
|
+
assert_equal false, @formatter.send(:element_start?, node)
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_format_node
|
91
|
+
node = mock()
|
92
|
+
node.stubs(:attribute).with("Id").returns("1")
|
93
|
+
node.stubs(:attribute).with("Name").returns("Anony Mous")
|
94
|
+
|
95
|
+
@formatter.expects(:scrub).with("1").once.returns("1")
|
96
|
+
@formatter.expects(:scrub).with("Anony Mous").once.returns("Anony Mous")
|
97
|
+
|
98
|
+
result = @formatter.send(:format_node, node, [ "Id", "Name" ])
|
99
|
+
|
100
|
+
assert_equal "1\vAnony Mous", result
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_format_node_with_missing_attribute
|
104
|
+
node = mock()
|
105
|
+
node.stubs(:attribute).with("Id").returns("1")
|
106
|
+
node.stubs(:attribute).with("Name").returns(nil)
|
107
|
+
|
108
|
+
@formatter.expects(:scrub).once.with("1").returns("1")
|
109
|
+
|
110
|
+
result = @formatter.send(:format_node, node, [ "Id", "Name" ])
|
111
|
+
|
112
|
+
assert_equal "1\v", result
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_scrub
|
116
|
+
assert_equal '<asdffdsa>', @formatter.send(:scrub, "<asdf\nfdsa\r>")
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
data/test/test_models.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'so2db'
|
3
|
+
|
4
|
+
class LookupTest < Test::Unit::TestCase
|
5
|
+
include Rake::DSL
|
6
|
+
|
7
|
+
def test_lookup_badges
|
8
|
+
assert_equal SO2DB::Models::Badge, SO2DB::Models::Lookup::find_class("badges")
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_required_attrs_badges
|
12
|
+
attrs = SO2DB::Models::Lookup::get_required_attrs("badges")
|
13
|
+
assert_equal [ "Id", "UserId", "Name", "Date" ], attrs
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_guid_capitalization
|
17
|
+
attrs = SO2DB::Models::Lookup::get_required_attrs("posthistory")
|
18
|
+
assert_block { attrs.include? 'RevisionGUID' }
|
19
|
+
end
|
20
|
+
end
|
data/test/test_so2db.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'mocha'
|
3
|
+
require 'so2db'
|
4
|
+
|
5
|
+
class ImporterTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_import_data
|
8
|
+
importer = SO2DB::Importer.new
|
9
|
+
Dir.expects(:entries).once.with('/tmp').returns([ 'test.bak', 'test.xml' ])
|
10
|
+
SO2DB::Formatter.expects(:new).once
|
11
|
+
.with('/tmp/test.xml', 11.chr.to_s).returns('formatter')
|
12
|
+
importer.expects(:import_stream).once.with('formatter')
|
13
|
+
|
14
|
+
importer.send(:import_data, '/tmp')
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
data/test/test_so2pg.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'mocha'
|
3
|
+
require 'so2pg'
|
4
|
+
|
5
|
+
class PgImporterTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@importer = PgImporter.new(true, true, { :database => "dbname", :dir => "dir" })
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_import_stream
|
12
|
+
formatter = mock()
|
13
|
+
formatter.stubs(:file_name).returns("file_name")
|
14
|
+
formatter.expects(:value_str).once.returns('badges(id,name)')
|
15
|
+
|
16
|
+
@importer.expects(:build_sql).once.with('badges(id,name)').returns("COPY...")
|
17
|
+
@importer.expects(:build_cmd).once.with("COPY...").returns("cmd")
|
18
|
+
@importer.stubs(:execute_cmd).returns('')
|
19
|
+
@importer.expects(:execute_cmd).once.with("cmd", formatter).returns('')
|
20
|
+
|
21
|
+
$stdout.stubs(:puts).returns('')
|
22
|
+
|
23
|
+
@importer.send(:import_stream, formatter)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_build_sql
|
27
|
+
exp = "COPY badges(id,name) FROM STDIN WITH (FORMAT csv, DELIMITER E'\x0B')"
|
28
|
+
assert_equal exp, @importer.send(:build_sql, "badges(id,name)")
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_build_cmd
|
32
|
+
expected = "psql -d dbname -c \"COPY...\""
|
33
|
+
actual = @importer.send(:build_cmd, "COPY...")
|
34
|
+
|
35
|
+
assert_equal expected, actual
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_build_cmd_sets_env_password
|
39
|
+
importer = PgImporter.new(true, true, { :database => "dbname",
|
40
|
+
:dir => "dir",
|
41
|
+
:password => "asdf1234" })
|
42
|
+
|
43
|
+
importer.send(:build_cmd, "COPY...")
|
44
|
+
assert_equal "asdf1234", ENV['PGPASSWORD']
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_execute_cmd
|
48
|
+
strm = mock()
|
49
|
+
formatter = mock()
|
50
|
+
|
51
|
+
IO.expects(:popen).once.with("cmd", "r+").yields(strm)
|
52
|
+
formatter.expects(:format).once.with(strm)
|
53
|
+
strm.expects(:close_write).once
|
54
|
+
|
55
|
+
@importer.send(:execute_cmd, "cmd", formatter)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
class PgOptionsParserTest < Test::Unit::TestCase
|
61
|
+
|
62
|
+
def test_all_options
|
63
|
+
host = 'localhost'
|
64
|
+
database = 'database_name'
|
65
|
+
directory = 'data_directory'
|
66
|
+
user = 'anony'
|
67
|
+
password = 'mous'
|
68
|
+
port = '1234'
|
69
|
+
|
70
|
+
cmd = [ '-H', host, '-d', database, '-D', directory, '-u', user,
|
71
|
+
'-p', password, '-P', port, '-O', '-R' ]
|
72
|
+
|
73
|
+
options = PgOptionsParser.parse(cmd)
|
74
|
+
|
75
|
+
assert_equal host, options[:host]
|
76
|
+
assert_equal database, options[:database]
|
77
|
+
assert_equal directory, options[:dir]
|
78
|
+
assert_equal user, options[:username]
|
79
|
+
assert_equal password, options[:password]
|
80
|
+
assert_equal port, options[:port]
|
81
|
+
assert options[:optionals]
|
82
|
+
assert options[:relationships]
|
83
|
+
end
|
84
|
+
|
85
|
+
def assert_help_displayed(cmd)
|
86
|
+
$stdout.expects(:puts).returns('')
|
87
|
+
options = PgOptionsParser.parse(cmd)
|
88
|
+
|
89
|
+
assert_nil options
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_options_without_database
|
93
|
+
assert_help_displayed [ '-D', 'data_dir' ]
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_options_without_data_dir
|
97
|
+
assert_help_displayed [ '-d', 'database_name' ]
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_options_with_help
|
101
|
+
assert_help_displayed [ '-D', 'data_dir', '-d', 'database_name', '-h' ]
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_options_without_optionals
|
105
|
+
cmd = [ '-D', 'data_dir', '-d', 'database_name', '-R' ]
|
106
|
+
options = PgOptionsParser.parse(cmd)
|
107
|
+
|
108
|
+
assert options[:relationships]
|
109
|
+
assert !options[:optionals]
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_options_without_relationships
|
113
|
+
cmd = [ '-D', 'data_dir', '-d', 'database_name', '-O' ]
|
114
|
+
options = PgOptionsParser.parse(cmd)
|
115
|
+
|
116
|
+
assert !options[:relationships]
|
117
|
+
assert options[:optionals]
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|